From 1661622e082f955377b7f15fef789d23df3f9d59 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 16 Feb 2021 02:43:20 -0500 Subject: [PATCH] import qemu-kvm-4.2.0-34.module+el8.3.0+9828+7aab3355.3 --- .gitignore | 1 + .qemu-kvm.metadata | 1 + SOURCES/0005-Initial-redhat-build.patch | 167 + ...0006-Enable-disable-devices-for-RHEL.patch | 994 ++ ...Machine-type-related-general-changes.patch | 675 + SOURCES/0008-Add-aarch64-machine-types.patch | 276 + SOURCES/0009-Add-ppc64-machine-types.patch | 463 + SOURCES/0010-Add-s390x-machine-types.patch | 126 + SOURCES/0011-Add-x86_64-machine-types.patch | 897 + SOURCES/0012-Enable-make-check.patch | 307 + ...mber-of-devices-that-can-be-assigned.patch | 114 + ...Add-support-statement-to-help-output.patch | 58 + ...lly-limit-the-maximum-number-of-CPUs.patch | 152 + .../0016-Add-support-for-simpletrace.patch | 121 + ...documentation-instead-of-qemu-system.patch | 118 + ...18-usb-xhci-Fix-PCI-capability-order.patch | 96 + ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 69 + ...e-at-least-64kiB-pages-for-downstrea.patch | 60 + ...er-m_free-might-read-pointers-from-a.patch | 61 + SOURCES/81-kvm-rhel.rules | 1 + SOURCES/85-kvm.preset | 5 + SOURCES/95-kvm-memlock.conf | 10 + SOURCES/99-qemu-guest-agent.rules | 2 + SOURCES/README.tests | 39 + SOURCES/bridge.conf | 1 + SOURCES/ksm.service | 13 + SOURCES/ksm.sysconfig | 4 + SOURCES/ksmctl.c | 77 + SOURCES/ksmtuned | 139 + SOURCES/ksmtuned.conf | 21 + SOURCES/ksmtuned.service | 12 + ...pected-files-for-HMAT-tests-acpihmat.patch | 41 + ...-leak-memory-when-reallocation-fails.patch | 58 + ...-afte-free-in-ip_reass-CVE-2020-1983.patch | 60 + ...qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch | 55 + ...able-nested-PERF_GLOBAL_CTRL-MSR-sup.patch | 53 + ...ate-dirty_bmap-when-we-change-a-slot.patch | 115 + ...remaining-malloc-free-user-with-glib.patch | 118 + ...vm-Revert-RHEL-disable-hostmem-memfd.patch | 58 + ...n-t-let-an-operation-wait-for-itself.patch | 121 + ...sd-fix-memory-leak-on-fuse-queueinfo.patch | 63 + ...it-APIC-ID-for-migration-instance-ID.patch | 62 + ...ove-error-for-bdrv_getlength-failure.patch | 51 + ...re-that-source-and-target-size-match.patch | 124 + ...-acquire-aio_context-in-backup_clean.patch | 57 + .../kvm-backup-top-Begin-drain-earlier.patch | 56 + ...ecursively-even-for-already-active-n.patch | 116 + ...lags-to-BlockDriver.bdrv_co_truncate.patch | 283 + ...lock-Add-flags-to-bdrv-_co-_truncate.patch | 353 + ...tion-to-truncation-of-long-NBD-expor.patch | 105 + ..._flight-during-blk_wait_while_draine.patch | 84 + ...x-cross-AioContext-blockdev-snapshot.patch | 91 + ...ix-leak-in-bdrv_create_file_fallback.patch | 60 + ...block-Generic-file-creation-fallback.patch | 227 + ...B.in_flight-for-coroutine-and-sync-i.patch | 295 + ...troduce-bdrv_reopen_commit_post-step.patch | 65 + ...Make-bdrv_get_cumulative_perm-public.patch | 67 + ...sier-to-learn-which-BDS-support-bitm.patch | 145 + ...x-restrictions-for-blockdev-snapshot.patch | 117 + ...x-blockdev-reopen-API-with-feature-f.patch | 57 + ...l-entire-LUKS-header-space-with-zero.patch | 308 + ...ck-backend-Add-flags-to-blk_truncate.patch | 294 + ...order-flush-pdiscard-function-defini.patch | 158 + ...-Don-t-acquire-context-while-droppin.patch | 130 + ...n-with-backing-file-in-different-Aio.patch | 114 + ...header-field-names-are-case-insensit.patch | 55 + ...header-fields-allow-whitespace-aroun.patch | 76 + ...ducing-bdrv_co_delete_file-interface.patch | 99 + ...vm-block-nbd-Fix-hang-in-.bdrv_close.patch | 78 + ...Driver-reference-to-the-.bdrv_co_cre.patch | 328 + ...-bitmap-reopen-into-bdrv_reopen_comm.patch | 78 + ...wn-the-fallback-image-creation-funct.patch | 296 + ...Don-t-make-backing-file-data-visible.patch | 94 + ...m-block.c-adding-bdrv_co_delete_file.patch | 92 + ...-AioContext-on-dirty-bitmap-function.patch | 176 + ...-several-bitmap-functions-to-non-sta.patch | 179 + ...bs-to-the-proper-context-on-snapshot.patch | 107 + ...ff-basic-bitmap-operations-for-qemu-.patch | 720 + ...ing-style-issues-in-drive_backup_pre.patch | 62 + ...drv_try_set_aio_context-context-requ.patch | 204 + ...mp_blockdev_backup-and-blockdev-back.patch | 144 + ...mp_drive_backup-and-drive-backup-tra.patch | 419 + ...me-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch | 137 + ...do-not-make-qemu-ga-link-with-pixman.patch | 2463 +++ ...mpat-disable-edid-for-virtio-gpu-ccw.patch | 50 + SOURCES/kvm-config-enable-VFIO_CCW.patch | 39 + ...ost-user-Protect-slave-fd-with-mutex.patch | 134 + ...-created-file-when-block_crypto_co_c.patch | 98 + ...tures-Make-kvm-no-adjvtime-comment-c.patch | 56 + SOURCES/kvm-enable-ramfb.patch | 72 + ...error-Document-Error-API-usage-rules.patch | 156 + ...ix-examples-in-error.h-s-big-comment.patch | 87 + ...-error-Improve-error.h-s-big-comment.patch | 148 + SOURCES/kvm-error-New-macro-ERRP_GUARD.patch | 307 + ...-Free-rom-data-during-inmigrate-skip.patch | 85 + ...-file-posix-Drop-hdev_co_create_opts.patch | 131 + ...ort-BDRV_REQ_ZERO_WRITE-for-truncate.patch | 48 + ...Memory-Proximity-Domain-Attributes-S.patch | 275 + ...Memory-Side-Cache-Information-Struct.patch | 137 + ...System-Locality-Latency-and-Bandwidt.patch | 173 + ...ow-using-qdev-ID-for-qemu-io-command.patch | 100 + ...id-hot-plug-if-it-s-disabled-on-the-.patch | 77 + ...-hot-plug-capability-check-to-pre_pl.patch | 90 + ...ace-PCI_DEVICE-casts-with-existing-v.patch | 62 + ...w-default-SMBIOS-fields-for-Windows-.patch | 262 + ...d-2nd-Generation-AMD-EPYC-processors.patch | 199 + ...-i386-Add-MSR-feature-bit-for-MDS-NO.patch | 46 + SOURCES/kvm-i386-Add-macro-for-stibp.patch | 49 + ...vm-i386-Add-new-CPU-model-Cooperlake.patch | 108 + ...M-features-if-nested-SVM-is-disabled.patch | 82 + ...vm-i386-Remove-cpu64-rhel6-CPU-model.patch | 77 + ...-Resolve-CPU-models-to-v1-by-default.patch | 95 + ...roduce-kvm_kernel_irqchip_-functions.patch | 281 + ...6-Move-v3-exclusive-test-to-new-file.patch | 241 + ...est-EIO-on-allocation-in-a-data-file.patch | 112 + ...est-EIO-on-preallocated-zero-cluster.patch | 102 + ...5-refactor-compressed-backup-to-vmdk.patch | 176 + ...-vmdk-target-tests-if-vmdk-is-not-wh.patch | 45 + ...09-Don-t-mirror-with-mismatched-size.patch | 387 + ...-229-Use-blkdebug-to-inject-an-error.patch | 120 + ...vm-iotests-Add-iothread-cases-to-155.patch | 147 + ...-skip_if_unsupported-statements-to-t.patch | 236 + SOURCES/kvm-iotests-Add-qemu_io_log.patch | 48 + ...-291-to-for-qemu-img-bitmap-coverage.patch | 253 + ...Add-test-for-image-creation-fallback.patch | 138 + ...up-with-different-source-target-size.patch | 105 + ...vm-iotests-Create-VM.blockdev_create.patch | 59 + ...ter-testfiles-out-in-filter_img_info.patch | 52 + ...tests-Fix-run_job-with-use_log-False.patch | 47 + SOURCES/kvm-iotests-Fix-test-178.patch | 59 + ...-_make_test_img-parse-its-parameters.patch | 91 + ...or-with-different-source-target-size.patch | 110 + ...r-blockdev-reopen-test-for-iothreads.patch | 122 + ...ests-Support-job-complete-in-run_job.patch | 46 + ...est-committing-to-short-backing-file.patch | 480 + ...Test-external-snapshot-with-VM-state.patch | 189 + ...dling-of-AioContexts-with-some-block.patch | 322 + ...ror-with-temporarily-disabled-target.patch | 162 + ...iotests-Use-complete_and_wait-in-155.patch | 50 + ...tests-don-t-use-format-for-drive_add.patch | 81 + ...py-Let-wait_migration-wait-even-more.patch | 123 + ...count-from-GET-LBA-STATUS-CVE-2020-1.patch | 79 + .../kvm-iscsi-Drop-iscsi_co_create_opts.patch | 113 + ...b-s-lock-individually-in-job_txn_app.patch | 213 + ...t-user-Fix-some-memtable-remap-cases.patch | 117 + ...ux-headers-support-vfio-ccw-features.patch | 77 + SOURCES/kvm-linux-headers-update-kvm.h.patch | 119 + ...-SaveStateEntry.instance_id-into-uin.patch | 179 + ...igration-Create-migration_is_running.patch | 119 + ...ation-Define-VMSTATE_INSTANCE_ID_ANY.patch | 257 + ...n-Don-t-send-data-if-we-have-stopped.patch | 42 + ...ure-that-we-don-t-call-write-in-case.patch | 94 + ...VM-is-paused-when-migration-is-cance.patch | 70 + ...gration-Rate-limit-inside-host-pages.patch | 172 + ...ifd-clean-pages-after-filling-packet.patch | 65 + ...d-fix-destroyed-mutex-access-in-term.patch | 77 + ...d-fix-nullptr-access-in-multifd_send.patch | 75 + ...d-fix-nullptr-access-in-terminating-.patch | 68 + ...n-t-let-an-operation-wait-for-itself.patch | 123 + ...re-that-source-and-target-size-match.patch | 89 + ...-Store-MirrorOp.co-for-debuggability.patch | 51 + ...r-Wait-only-for-in-flight-operations.patch | 95 + ...e-that-we-don-t-do-any-IO-after-an-e.patch | 74 + ...-long-error-message-assertions-CVE-2.patch | 161 + ...to-provide-initiator-information-for.patch | 318 + ...to-provide-memory-latency-and-bandwi.patch | 545 + ...to-provide-memory-side-cache-informa.patch | 326 + ...-properly-check-if-numa-is-supported.patch | 81 + .../kvm-numa-remove-not-needed-check.patch | 59 + ...kvm-pc-bios-s390x-Fix-reset-psw-mask.patch | 75 + ...-s390x-Save-iplb-location-in-lowcore.patch | 145 + ...ot_port-Add-hotplug-disabling-option.patch | 153 + ...-external-interrupt-pin-in-KVM-on-re.patch | 107 + ...UPPCState-irq_input_state-with-moder.patch | 112 + ...rite-only-overlay-feature-for-blockd.patch | 64 + ...ble-use-of-g_autoptr-with-QAPI-types.patch | 239 + ...2-Expose-bitmaps-size-during-measure.patch | 495 + ...cluster_abort-for-pre-existing-clust.patch | 47 + ...alloc_cluster_abort-for-external-dat.patch | 52 + ...RO_WRITE-flag-for-full-preallocation.patch | 98 + ...ort-BDRV_REQ_ZERO_WRITE-for-truncate.patch | 101 + ...qemu-file-Don-t-do-IO-after-shutdown.patch | 92 + .../kvm-qemu-img-Add-bitmap-sub-command.patch | 398 + ...-qemu-img-Add-convert-bitmaps-option.patch | 244 + ...-Factor-out-code-for-merging-bitmaps.patch | 89 + ...d-cvtnum_full-to-print-error-reports.patch | 241 + .../kvm-qga-add-command-guest-get-disks.patch | 117 + ...ntation-of-guest-get-disks-for-Linux.patch | 431 + ...tation-of-guest-get-disks-for-Window.patch | 182 + ...-assert-regression-on-guest-shutdown.patch | 61 + ...sing-closedir-in-qmp_guest_get_disks.patch | 56 + ...a-for-guest-get-disks-dependents-fie.patch | 115 + ...ort-BDRV_REQ_ZERO_WRITE-for-truncate.patch | 55 + ...rt-we-own-context-before-job_cancel_.patch | 57 + ...-by-one-in-update_machine_ipl_proper.patch | 54 + SOURCES/kvm-s390-ipl-sync-back-loadparm.patch | 91 + ...-sclp-improve-special-wait-psw-logic.patch | 52 + SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch | 150 + ...90x-Add-missing-vcpu-reset-functions.patch | 176 + ...x-Add-unpack-facility-feature-to-GA1.patch | 76 + .../kvm-s390x-Beautify-diag308-handling.patch | 130 + ...do-a-normal-reset-on-the-initial-cpu.patch | 52 + ...90x-Fix-cpu-normal-reset-ri-clearing.patch | 101 + SOURCES/kvm-s390x-Move-clear-reset.patch | 146 + ...nose-308-subcodes-and-rcs-into-ipl.h.patch | 83 + SOURCES/kvm-s390x-Move-initial-reset.patch | 159 + ...reset-normal-to-shared-reset-handler.patch | 145 + ...etch-and-test-the-short-psw-on-diag3.patch | 70 + ...-use-constants-for-short-PSW-address.patch | 87 + ...s-Refactor-the-css_queue_crw-routine.patch | 119 + ...idate-iplb-validity-check-into-one-f.patch | 82 + ...-kvm-Make-kvm_sclp_service_call-void.patch | 83 + ...s390x-protvirt-Add-migration-blocker.patch | 79 + ...isable-address-checks-for-PV-guest-I.patch | 135 + ...ix-stray-error_report_err-in-s390_ma.patch | 55 + ...t-Handle-SIGP-store-status-correctly.patch | 61 + ...nhibit-balloon-when-switching-to-pro.patch | 104 + ...s390x-protvirt-KVM-intercept-changes.patch | 75 + ...Move-IO-control-structures-over-SIDA.patch | 171 + ...x-protvirt-Move-STSI-data-over-SIDAD.patch | 70 + ...rotvirt-Move-diag-308-data-over-SIDA.patch | 93 + ...m-s390x-protvirt-SCLP-interpretation.patch | 172 + ...kvm-s390x-protvirt-Set-guest-IPL-PSW.patch | 75 + ...90x-protvirt-Support-unpack-facility.patch | 886 + ...llow-to-IPL-secure-guests-with-no-re.patch | 61 + ...M_PV_PREP_RESET-command-wrapper-name.patch | 92 + .../kvm-s390x-pv-Retry-ioctls-on-EINTR.patch | 57 + ...o-ccw-Fix-build-on-systems-without-K.patch | 150 + ...90x-sigp-Fix-sense-running-reporting.patch | 49 + ...ear-local-interrupts-on-reset-normal.patch | 57 + SOURCES/kvm-s390x.conf | 19 + ...ing-of-whole-process-instead-of-thre.patch | 79 + SOURCES/kvm-setup | 49 + SOURCES/kvm-setup.service | 14 + ...ct-size-while-emulating-IRC-commands.patch | 77 + ...orrect-size-while-emulating-commands.patch | 71 + ...ger-a-CAS-reboot-for-XICS-XIVE-mode-.patch | 113 + ....3-accelerated-count-cache-flush-in-.patch | 135 + ...aximum-number-of-vCPUs-to-the-KVM-in.patch | 213 + ...-target-arm-Fix-PAuth-sbox-functions.patch | 65 + ...m-target-arm-arch_dump-Add-SVE-notes.patch | 298 + ...Add-the-kvm-no-adjvtime-CPU-property.patch | 281 + ...vm-Implement-virtual-time-adjustment.patch | 330 + ...rivial-Clean-up-header-documentation.patch | 197 + ...vm64-kvm64-cpus-have-timer-registers.patch | 60 + ...or-query-cpu-model-expansion-crashed.patch | 81 + ...ARCH_CAPABILITIES-related-bits-into-.patch | 83 + ...missed-features-to-Cooperlake-CPU-mo.patch | 103 + ...new-bit-definitions-of-MSR_IA32_ARCH.patch | 62 + ...target-i386-add-a-ucode-rev-property.patch | 125 + ...k-for-availability-of-MSR_IA32_UCODE.patch | 72 + ...ot-set-unsupported-VMX-secondary-exe.patch | 112 + ...le-monitor-and-ucode-revision-with-c.patch | 49 + ...target-i386-fix-TCG-UCODE_REV-access.patch | 73 + ...m-initialize-feature-MSRs-very-early.patch | 178 + ...initialize-microcode-revision-from-K.patch | 64 + ...the-CPUID-level-to-0x14-on-old-machi.patch | 69 + ...fail-query-sev-capabilities-if-QEMU-.patch | 56 + ...provide-proper-error-reporting-for-q.patch | 142 + ...-Enable-adapter-interruption-suppres.patch | 60 + SOURCES/kvm-tcp_emu-Fix-oob-access.patch | 59 + ...m-tcp_emu-fix-unsafe-snprintf-usages.patch | 149 + ...eatures-Check-feature-default-values.patch | 106 + ...es-test-add-test-cases-for-ACPI-HMAT.patch | 127 + ...tor-Fix-the-bad-s390x-assembler-code.patch | 60 + ...sts-numa-Add-case-for-QMP-build-HMAT.patch | 266 + ...fuse_lowlevel-Fix-fuse_out_header-er.patch | 55 + ...ofsd-passthrough_ll-Fix-double-close.patch | 56 + SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch | 58 + ...e-update-qemu-trace-stap-to-Python-3.patch | 82 + ...sb-fix-setup_len-init-CVE-2020-14364.patch | 102 + ...-Prevent-recursion-in-usbredir_write.patch | 106 + SOURCES/kvm-util-add-slirp_fmt-helpers.patch | 140 + ...d-support-for-the-CRW-region-and-IRQ.patch | 175 + ...ccw-Add-support-for-the-schib-region.patch | 254 + SOURCES/kvm-vfio-ccw-Fix-error-message.patch | 48 + ...vm-vfio-ccw-Refactor-ccw-irq-handler.patch | 155 + ...vfio-ccw-Refactor-cleanup-of-regions.patch | 73 + ...kvm-vfio-ccw-allow-non-prefetch-ORBs.patch | 61 + ...ve-exec-permission-to-avoid-SELinux-.patch | 75 + ...emove-irqchip-notifier-if-not-regist.patch | 58 + ...Add-names-to-section-rounded-warning.patch | 53 + ...t-Only-align-sections-for-vhost-user.patch | 97 + SOURCES/kvm-vhost-coding-style-fix.patch | 56 + ...ctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch | 69 + ...Print-unexpected-slave-message-types.patch | 48 + ...host-user-fs-remove-vhostfd-property.patch | 59 + ...st-user-gpu-Drop-trailing-json-comma.patch | 52 + ...ility-to-delete-vq-through-a-pointer.patch | 80 + ...start-process-queued-requests-in-the.patch | 203 + ...tor-the-code-that-processes-queued-r.patch | 83 + ...-enable-notifications-during-polling.patch | 158 + ...io-fs-fix-MSI-X-nvectors-calculation.patch | 60 + ...-make-virtio_delete_queue-idempotent.patch | 42 + ...e-also-control-queue-when-TX-RX-dele.patch | 49 + ...o-net-fix-removal-of-failover-device.patch | 52 + ...-region-cache-when-on-queue-deletion.patch | 46 + .../kvm-virtiofs-Add-maintainers-entry.patch | 52 + ...-to-the-log-with-FUSE_LOG_DEBUG-leve.patch | 86 + ...akefile-wiring-for-virtiofsd-contrib.patch | 106 + .../kvm-virtiofsd-Add-auxiliary-.c-s.patch | 1387 ++ .../kvm-virtiofsd-Add-fuse_lowlevel.c.patch | 3172 ++++ .../kvm-virtiofsd-Add-main-virtio-loop.patch | 105 + ...kvm-virtiofsd-Add-options-for-virtio.patch | 103 + .../kvm-virtiofsd-Add-passthrough_ll.patch | 1387 ++ ...mestamp-to-the-log-with-FUSE_LOG_DEB.patch | 73 + ...virtiofsd-Clean-up-inodes-on-destroy.patch | 85 + ...t-lo_destroy-to-take-the-lo-mutex-lo.patch | 112 + ...op-CAP_FSETID-if-client-asked-for-it.patch | 176 + ...-virtiofsd-Fast-path-for-virtio-read.patch | 240 + ...mmon-header-and-define-for-QEMU-buil.patch | 164 + ...ta-corruption-with-O_APPEND-write-in.patch | 136 + ...fuse_daemonize-ignored-return-values.patch | 120 + .../kvm-virtiofsd-Fix-xattr-operations.patch | 327 + ...-Format-imported-files-to-qemu-style.patch | 14743 ++++++++++++++++ .../kvm-virtiofsd-Handle-hard-reboot.patch | 65 + SOURCES/kvm-virtiofsd-Handle-reinit.patch | 53 + .../kvm-virtiofsd-Keep-track-of-replies.patch | 116 + ...Kill-threads-when-queues-are-stopped.patch | 143 + ...sync-work-even-if-only-inode-is-pass.patch | 96 + ...vhost-connection-instead-of-mounting.patch | 257 + ...ofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch | 76 + ...Pass-write-iov-s-all-the-way-through.patch | 140 + ...-fuse_bufvec-through-to-do_write_buf.patch | 168 + ...kvm-virtiofsd-Poll-kick_fd-for-queue.patch | 97 + ...t-multiply-running-with-same-vhost_u.patch | 144 + ...vm-virtiofsd-Pull-in-kernel-s-fuse.h.patch | 945 + ...m-virtiofsd-Pull-in-upstream-headers.patch | 4911 +++++ ...-virtiofsd-Remove-fuse_req_getgroups.patch | 193 + ...move-unused-enum-fuse_buf_copy_flags.patch | 271 + ...Reset-O_DIRECT-flag-during-file-open.patch | 72 + ...m-virtiofsd-Send-replies-to-messages.patch | 199 + SOURCES/kvm-virtiofsd-Show-submounts.patch | 51 + .../kvm-virtiofsd-Start-queue-threads.patch | 165 + ...sd-Start-reading-commands-from-queue.patch | 200 + ...virtiofsd-Start-wiring-up-vhost-user.patch | 247 + ...virtiofsd-Support-remote-posix-locks.patch | 355 + ...m-virtiofsd-Trim-down-imported-files.patch | 1582 ++ ...irtiofsd-Trim-out-compatibility-code.patch | 545 + ...sd-add-definition-of-fuse_buf_writev.patch | 93 + ...iofsd-add-fd-FDNUM-fd-passing-option.patch | 170 + ...kvm-virtiofsd-add-fuse_mbuf_iter-API.patch | 134 + ...iofsd-add-helper-for-lo_data-cleanup.patch | 88 + ...fsd-add-o-source-PATH-to-help-output.patch | 46 + ...tiofsd-add-print-capabilities-option.patch | 121 + ...rtiofsd-add-rlimit-nofile-NUM-option.patch | 164 + .../kvm-virtiofsd-add-seccomp-whitelist.patch | 285 + ...add-some-options-to-the-help-message.patch | 74 + ...iofsd-add-syslog-command-line-option.patch | 239 + ...ofsd-add-thread-pool-size-NUM-option.patch | 106 + ...m-virtiofsd-add-vhost-user.json-file.patch | 73 + SOURCES/kvm-virtiofsd-cap-ng-helpers.patch | 175 + ...input-buffer-size-in-fuse_lowlevel.c.patch | 1111 ++ ...fsd-cleanup-allocated-resource-in-se.patch | 82 + ...t-more-fprintf-and-perror-to-use-fus.patch | 99 + ...d-do-not-always-set-FUSE_FLOCK_LOCKS.patch | 57 + ...virtiofsd-do_read-missing-NULL-check.patch | 49 + ...ll-capabilities-in-the-wait-parent-p.patch | 67 + ...d-enable-PARALLEL_DIROPS-during-INIT.patch | 47 + ...ract-root-inode-init-into-setup_root.patch | 111 + ...hen-parent-inode-isn-t-known-in-lo_d.patch | 85 + ...virtiofsd-fix-error-handling-in-main.patch | 63 + ...correct-error-handling-in-lo_do_look.patch | 44 + ...tiofsd-fix-libfuse-information-leaks.patch | 322 + ...tiofsd-fix-lo_destroy-resource-leaks.patch | 94 + ...rtiofsd-fix-memory-leak-on-lo.source.patch | 66 + ...ate_listen_socket-error-path-socket-.patch | 56 + ...virtiofsd-get-set-features-callbacks.patch | 66 + ...uce-inode-refcount-to-prevent-use-af.patch | 589 + .../kvm-virtiofsd-jail-lo-proc_self_fd.patch | 85 + ...-virtiofsd-load_capng-missing-unlock.patch | 46 + ...tiofsd-make-f-foreground-the-default.patch | 76 + ...kvm-virtiofsd-make-lo_release-atomic.patch | 62 + ...irtiofsd-move-to-a-new-pid-namespace.patch | 223 + ...d-move-to-an-empty-network-namespace.patch | 66 + ...only-retain-file-system-capabilities.patch | 112 + ...rough_ll-Pass-errno-to-fuse_reply_er.patch | 54 + ...rough_ll-Use-cache_readdir-for-direc.patch | 48 + ...rough_ll-add-dirp_map-to-hide-lo_dir.patch | 238 + ...through_ll-add-fallback-for-racy-ops.patch | 303 + ...rough_ll-add-fd_map-to-hide-file-des.patch | 328 + ...rough_ll-add-ino_map-to-hide-lo_inod.patch | 395 + ...rough_ll-add-lo_map-for-ino-fh-indir.patch | 182 + ...passthrough_ll-add-renameat2-support.patch | 52 + ...rough_ll-clean-up-cache-related-opti.patch | 138 + ...hrough_ll-cleanup-getxattr-listxattr.patch | 154 + ...d-passthrough_ll-control-readdirplus.patch | 79 + ...rough_ll-create-new-files-in-caller-.patch | 198 + ...rough_ll-disable-readdirplus-on-cach.patch | 50 + ...rough_ll-fix-refcounting-on-remove-r.patch | 143 + ...rtiofsd-passthrough_ll-use-hashtable.patch | 211 + ...fsd-prevent-.-escape-in-lo_do_lookup.patch | 54 + ...sd-prevent-.-escape-in-lo_do_readdir.patch | 108 + ...prevent-FUSE_INIT-FUSE_DESTROY-races.patch | 103 + ...t-fv_queue_thread-vs-virtio_loop-rac.patch | 149 + ...iofsd-prevent-races-with-lo_dirp_put.patch | 147 + ...log-only-when-priority-is-high-enoug.patch | 469 + ...sd-process-requests-in-a-thread-pool.patch | 533 + ...fsd-remove-mountpoint-dummy-argument.patch | 159 + ...d-remove-unused-notify-reply-support.patch | 294 + ...name-inode-refcount-to-inode-nlookup.patch | 139 + ...-unref_inode-to-unref_inode_lolocked.patch | 94 + ...vm-virtiofsd-sandbox-mount-namespace.patch | 166 + ...ofsd-set-maximum-RLIMIT_NOFILE-limit.patch | 93 + ...elow-fs.file-max-sysctl-value-CVE-20.patch | 88 + ...ll-queue-threads-on-exit-in-virtio_l.patch | 72 + ...t-nanosecond-resolution-for-file-tim.patch | 83 + ...se_buf_writev-to-replace-fuse_buf_wr.patch | 82 + ...se_lowlevel_is_virtio-in-fuse_sessio.patch | 56 + ...-proc-self-fd-O_PATH-file-descriptor.patch | 390 + ...te-input-buffer-sizes-in-do_write_bu.patch | 137 + ...m-virtiofsd-validate-path-components.patch | 164 + ...-passthrough_ll-fix-fallocate-ifdefs.patch | 56 + SOURCES/kvm-x86.conf | 12 + SOURCES/kvm-xhci-recheck-slot-status.patch | 77 + SOURCES/kvm-xics-Don-t-deassert-outputs.patch | 52 + SOURCES/kvm.conf | 3 + SOURCES/qemu-ga.sysconfig | 19 + SOURCES/qemu-guest-agent.service | 20 + SOURCES/qemu-pr-helper.service | 15 + SOURCES/qemu-pr-helper.socket | 9 + SOURCES/udev-kvm-check.c | 172 + SOURCES/vhost.conf | 3 + SPECS/qemu-kvm.spec | 3652 ++++ 424 files changed, 90813 insertions(+) create mode 100644 .gitignore create mode 100644 .qemu-kvm.metadata create mode 100644 SOURCES/0005-Initial-redhat-build.patch create mode 100644 SOURCES/0006-Enable-disable-devices-for-RHEL.patch create mode 100644 SOURCES/0007-Machine-type-related-general-changes.patch create mode 100644 SOURCES/0008-Add-aarch64-machine-types.patch create mode 100644 SOURCES/0009-Add-ppc64-machine-types.patch create mode 100644 SOURCES/0010-Add-s390x-machine-types.patch create mode 100644 SOURCES/0011-Add-x86_64-machine-types.patch create mode 100644 SOURCES/0012-Enable-make-check.patch create mode 100644 SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch create mode 100644 SOURCES/0014-Add-support-statement-to-help-output.patch create mode 100644 SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 SOURCES/0016-Add-support-for-simpletrace.patch create mode 100644 SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch create mode 100644 SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch create mode 100644 SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch create mode 100644 SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch create mode 100644 SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch create mode 100644 SOURCES/81-kvm-rhel.rules create mode 100644 SOURCES/85-kvm.preset create mode 100644 SOURCES/95-kvm-memlock.conf create mode 100644 SOURCES/99-qemu-guest-agent.rules create mode 100644 SOURCES/README.tests create mode 100644 SOURCES/bridge.conf create mode 100644 SOURCES/ksm.service create mode 100644 SOURCES/ksm.sysconfig create mode 100644 SOURCES/ksmctl.c create mode 100644 SOURCES/ksmtuned create mode 100644 SOURCES/ksmtuned.conf create mode 100644 SOURCES/ksmtuned.service create mode 100644 SOURCES/kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch create mode 100644 SOURCES/kvm-Don-t-leak-memory-when-reallocation-fails.patch create mode 100644 SOURCES/kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch create mode 100644 SOURCES/kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch create mode 100644 SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch create mode 100644 SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch create mode 100644 SOURCES/kvm-Replace-remaining-malloc-free-user-with-glib.patch create mode 100644 SOURCES/kvm-Revert-RHEL-disable-hostmem-memfd.patch create mode 100644 SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch create mode 100644 SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch create mode 100644 SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch create mode 100644 SOURCES/kvm-backup-Improve-error-for-bdrv_getlength-failure.patch create mode 100644 SOURCES/kvm-backup-Make-sure-that-source-and-target-size-match.patch create mode 100644 SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch create mode 100644 SOURCES/kvm-backup-top-Begin-drain-earlier.patch create mode 100644 SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch create mode 100644 SOURCES/kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch create mode 100644 SOURCES/kvm-block-Add-flags-to-bdrv-_co-_truncate.patch create mode 100644 SOURCES/kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch create mode 100644 SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch create mode 100644 SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch create mode 100644 SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch create mode 100644 SOURCES/kvm-block-Generic-file-creation-fallback.patch create mode 100644 SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch create mode 100644 SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch create mode 100644 SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch create mode 100644 SOURCES/kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch create mode 100644 SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch create mode 100644 SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch create mode 100644 SOURCES/kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch create mode 100644 SOURCES/kvm-block-backend-Add-flags-to-blk_truncate.patch create mode 100644 SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch create mode 100644 SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch create mode 100644 SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch create mode 100644 SOURCES/kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch create mode 100644 SOURCES/kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch create mode 100644 SOURCES/kvm-block-introducing-bdrv_co_delete_file-interface.patch create mode 100644 SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch create mode 100644 SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch create mode 100644 SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch create mode 100644 SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch create mode 100644 SOURCES/kvm-block-truncate-Don-t-make-backing-file-data-visible.patch create mode 100644 SOURCES/kvm-block.c-adding-bdrv_co_delete_file.patch create mode 100644 SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch create mode 100644 SOURCES/kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch create mode 100644 SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch create mode 100644 SOURCES/kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch create mode 100644 SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch create mode 100644 SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch create mode 100644 SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch create mode 100644 SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch create mode 100644 SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch create mode 100644 SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch create mode 100644 SOURCES/kvm-compat-disable-edid-for-virtio-gpu-ccw.patch create mode 100644 SOURCES/kvm-config-enable-VFIO_CCW.patch create mode 100644 SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch create mode 100644 SOURCES/kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch create mode 100644 SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch create mode 100644 SOURCES/kvm-enable-ramfb.patch create mode 100644 SOURCES/kvm-error-Document-Error-API-usage-rules.patch create mode 100644 SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch create mode 100644 SOURCES/kvm-error-Improve-error.h-s-big-comment.patch create mode 100644 SOURCES/kvm-error-New-macro-ERRP_GUARD.patch create mode 100644 SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch create mode 100644 SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch create mode 100644 SOURCES/kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch create mode 100644 SOURCES/kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch create mode 100644 SOURCES/kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch create mode 100644 SOURCES/kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch create mode 100644 SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch create mode 100644 SOURCES/kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch create mode 100644 SOURCES/kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch create mode 100644 SOURCES/kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch create mode 100644 SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch create mode 100644 SOURCES/kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch create mode 100644 SOURCES/kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch create mode 100644 SOURCES/kvm-i386-Add-macro-for-stibp.patch create mode 100644 SOURCES/kvm-i386-Add-new-CPU-model-Cooperlake.patch create mode 100644 SOURCES/kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch create mode 100644 SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch create mode 100644 SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch create mode 100644 SOURCES/kvm-introduce-kvm_kernel_irqchip_-functions.patch create mode 100644 SOURCES/kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch create mode 100644 SOURCES/kvm-iotests-026-Test-EIO-on-allocation-in-a-data-file.patch create mode 100644 SOURCES/kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch create mode 100644 SOURCES/kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch create mode 100644 SOURCES/kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch create mode 100644 SOURCES/kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch create mode 100644 SOURCES/kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch create mode 100644 SOURCES/kvm-iotests-Add-iothread-cases-to-155.patch create mode 100644 SOURCES/kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch create mode 100644 SOURCES/kvm-iotests-Add-qemu_io_log.patch create mode 100644 SOURCES/kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch create mode 100644 SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch create mode 100644 SOURCES/kvm-iotests-Backup-with-different-source-target-size.patch create mode 100644 SOURCES/kvm-iotests-Create-VM.blockdev_create.patch create mode 100644 SOURCES/kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch create mode 100644 SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch create mode 100644 SOURCES/kvm-iotests-Fix-test-178.patch create mode 100644 SOURCES/kvm-iotests-Let-_make_test_img-parse-its-parameters.patch create mode 100644 SOURCES/kvm-iotests-Mirror-with-different-source-target-size.patch create mode 100644 SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch create mode 100644 SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch create mode 100644 SOURCES/kvm-iotests-Test-committing-to-short-backing-file.patch create mode 100644 SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch create mode 100644 SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch create mode 100644 SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch create mode 100644 SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch create mode 100644 SOURCES/kvm-iotests-don-t-use-format-for-drive_add.patch create mode 100644 SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch create mode 100644 SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch create mode 100644 SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch create mode 100644 SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch create mode 100644 SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch create mode 100644 SOURCES/kvm-linux-headers-support-vfio-ccw-features.patch create mode 100644 SOURCES/kvm-linux-headers-update-kvm.h.patch create mode 100644 SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch create mode 100644 SOURCES/kvm-migration-Create-migration_is_running.patch create mode 100644 SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch create mode 100644 SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch create mode 100644 SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch create mode 100644 SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch create mode 100644 SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch create mode 100644 SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch create mode 100644 SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch create mode 100644 SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch create mode 100644 SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch create mode 100644 SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch create mode 100644 SOURCES/kvm-mirror-Make-sure-that-source-and-target-size-match.patch create mode 100644 SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch create mode 100644 SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch create mode 100644 SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch create mode 100644 SOURCES/kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch create mode 100644 SOURCES/kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch create mode 100644 SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch create mode 100644 SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch create mode 100644 SOURCES/kvm-numa-properly-check-if-numa-is-supported.patch create mode 100644 SOURCES/kvm-numa-remove-not-needed-check.patch create mode 100644 SOURCES/kvm-pc-bios-s390x-Fix-reset-psw-mask.patch create mode 100644 SOURCES/kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch create mode 100644 SOURCES/kvm-pcie_root_port-Add-hotplug-disabling-option.patch create mode 100644 SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch create mode 100644 SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch create mode 100644 SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch create mode 100644 SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch create mode 100644 SOURCES/kvm-qcow2-Expose-bitmaps-size-during-measure.patch create mode 100644 SOURCES/kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch create mode 100644 SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch create mode 100644 SOURCES/kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch create mode 100644 SOURCES/kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch create mode 100644 SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch create mode 100644 SOURCES/kvm-qemu-img-Add-bitmap-sub-command.patch create mode 100644 SOURCES/kvm-qemu-img-Add-convert-bitmaps-option.patch create mode 100644 SOURCES/kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch create mode 100644 SOURCES/kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch create mode 100644 SOURCES/kvm-qga-add-command-guest-get-disks.patch create mode 100644 SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch create mode 100644 SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch create mode 100644 SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch create mode 100644 SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch create mode 100644 SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch create mode 100644 SOURCES/kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch create mode 100644 SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch create mode 100644 SOURCES/kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch create mode 100644 SOURCES/kvm-s390-ipl-sync-back-loadparm.patch create mode 100644 SOURCES/kvm-s390-sclp-improve-special-wait-psw-logic.patch create mode 100644 SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch create mode 100644 SOURCES/kvm-s390x-Add-missing-vcpu-reset-functions.patch create mode 100644 SOURCES/kvm-s390x-Add-unpack-facility-feature-to-GA1.patch create mode 100644 SOURCES/kvm-s390x-Beautify-diag308-handling.patch create mode 100644 SOURCES/kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch create mode 100644 SOURCES/kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch create mode 100644 SOURCES/kvm-s390x-Move-clear-reset.patch create mode 100644 SOURCES/kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch create mode 100644 SOURCES/kvm-s390x-Move-initial-reset.patch create mode 100644 SOURCES/kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch create mode 100644 SOURCES/kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch create mode 100644 SOURCES/kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch create mode 100644 SOURCES/kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch create mode 100644 SOURCES/kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch create mode 100644 SOURCES/kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Add-migration-blocker.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch create mode 100644 SOURCES/kvm-s390x-protvirt-KVM-intercept-changes.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch create mode 100644 SOURCES/kvm-s390x-protvirt-SCLP-interpretation.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Set-guest-IPL-PSW.patch create mode 100644 SOURCES/kvm-s390x-protvirt-Support-unpack-facility.patch create mode 100644 SOURCES/kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch create mode 100644 SOURCES/kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch create mode 100644 SOURCES/kvm-s390x-pv-Retry-ioctls-on-EINTR.patch create mode 100644 SOURCES/kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch create mode 100644 SOURCES/kvm-s390x-sigp-Fix-sense-running-reporting.patch create mode 100644 SOURCES/kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch create mode 100644 SOURCES/kvm-s390x.conf create mode 100644 SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch create mode 100644 SOURCES/kvm-setup create mode 100644 SOURCES/kvm-setup.service create mode 100644 SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch create mode 100644 SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch create mode 100644 SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch create mode 100644 SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch create mode 100644 SOURCES/kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch create mode 100644 SOURCES/kvm-target-arm-Fix-PAuth-sbox-functions.patch create mode 100644 SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch create mode 100644 SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch create mode 100644 SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch create mode 100644 SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch create mode 100644 SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch create mode 100644 SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch create mode 100644 SOURCES/kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch create mode 100644 SOURCES/kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch create mode 100644 SOURCES/kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch create mode 100644 SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch create mode 100644 SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch create mode 100644 SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch create mode 100644 SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch create mode 100644 SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch create mode 100644 SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch create mode 100644 SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch create mode 100644 SOURCES/kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch create mode 100644 SOURCES/kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch create mode 100644 SOURCES/kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch create mode 100644 SOURCES/kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch create mode 100644 SOURCES/kvm-tcp_emu-Fix-oob-access.patch create mode 100644 SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch create mode 100644 SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch create mode 100644 SOURCES/kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch create mode 100644 SOURCES/kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch create mode 100644 SOURCES/kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch create mode 100644 SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch create mode 100644 SOURCES/kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch create mode 100644 SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch create mode 100644 SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch create mode 100644 SOURCES/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch create mode 100644 SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch create mode 100644 SOURCES/kvm-util-add-slirp_fmt-helpers.patch create mode 100644 SOURCES/kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch create mode 100644 SOURCES/kvm-vfio-ccw-Add-support-for-the-schib-region.patch create mode 100644 SOURCES/kvm-vfio-ccw-Fix-error-message.patch create mode 100644 SOURCES/kvm-vfio-ccw-Refactor-ccw-irq-handler.patch create mode 100644 SOURCES/kvm-vfio-ccw-Refactor-cleanup-of-regions.patch create mode 100644 SOURCES/kvm-vfio-ccw-allow-non-prefetch-ORBs.patch create mode 100644 SOURCES/kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch create mode 100644 SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch create mode 100644 SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch create mode 100644 SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch create mode 100644 SOURCES/kvm-vhost-coding-style-fix.patch create mode 100644 SOURCES/kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch create mode 100644 SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch create mode 100644 SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch create mode 100644 SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch create mode 100644 SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch create mode 100644 SOURCES/kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch create mode 100644 SOURCES/kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch create mode 100644 SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch create mode 100644 SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch create mode 100644 SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch create mode 100644 SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch create mode 100644 SOURCES/kvm-virtio-net-fix-removal-of-failover-device.patch create mode 100644 SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch create mode 100644 SOURCES/kvm-virtiofs-Add-maintainers-entry.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch create mode 100644 SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch create mode 100644 SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch create mode 100644 SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch create mode 100644 SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch create mode 100644 SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch create mode 100644 SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch create mode 100644 SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch create mode 100644 SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch create mode 100644 SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch create mode 100644 SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch create mode 100644 SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch create mode 100644 SOURCES/kvm-virtiofsd-Handle-reinit.patch create mode 100644 SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch create mode 100644 SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch create mode 100644 SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch create mode 100644 SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch create mode 100644 SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch create mode 100644 SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch create mode 100644 SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch create mode 100644 SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch create mode 100644 SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch create mode 100644 SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch create mode 100644 SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch create mode 100644 SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch create mode 100644 SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch create mode 100644 SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch create mode 100644 SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch create mode 100644 SOURCES/kvm-virtiofsd-Show-submounts.patch create mode 100644 SOURCES/kvm-virtiofsd-Start-queue-threads.patch create mode 100644 SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch create mode 100644 SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch create mode 100644 SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch create mode 100644 SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch create mode 100644 SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch create mode 100644 SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch create mode 100644 SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch create mode 100644 SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch create mode 100644 SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch create mode 100644 SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch create mode 100644 SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch create mode 100644 SOURCES/kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch create mode 100644 SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch create mode 100644 SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch create mode 100644 SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch create mode 100644 SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch create mode 100644 SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch create mode 100644 SOURCES/kvm-virtiofsd-cap-ng-helpers.patch create mode 100644 SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch create mode 100644 SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch create mode 100644 SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch create mode 100644 SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch create mode 100644 SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch create mode 100644 SOURCES/kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch create mode 100644 SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch create mode 100644 SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch create mode 100644 SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch create mode 100644 SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch create mode 100644 SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch create mode 100644 SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch create mode 100644 SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch create mode 100644 SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch create mode 100644 SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch create mode 100644 SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch create mode 100644 SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch create mode 100644 SOURCES/kvm-virtiofsd-jail-lo-proc_self_fd.patch create mode 100644 SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch create mode 100644 SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch create mode 100644 SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch create mode 100644 SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch create mode 100644 SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch create mode 100644 SOURCES/kvm-virtiofsd-only-retain-file-system-capabilities.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch create mode 100644 SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch create mode 100644 SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch create mode 100644 SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch create mode 100644 SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch create mode 100644 SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch create mode 100644 SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch create mode 100644 SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch create mode 100644 SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch create mode 100644 SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch create mode 100644 SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch create mode 100644 SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch create mode 100644 SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch create mode 100644 SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch create mode 100644 SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch create mode 100644 SOURCES/kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch create mode 100644 SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch create mode 100644 SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch create mode 100644 SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch create mode 100644 SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch create mode 100644 SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch create mode 100644 SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch create mode 100644 SOURCES/kvm-virtiofsd-validate-path-components.patch create mode 100644 SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch create mode 100644 SOURCES/kvm-x86.conf create mode 100644 SOURCES/kvm-xhci-recheck-slot-status.patch create mode 100644 SOURCES/kvm-xics-Don-t-deassert-outputs.patch create mode 100644 SOURCES/kvm.conf create mode 100644 SOURCES/qemu-ga.sysconfig create mode 100644 SOURCES/qemu-guest-agent.service create mode 100644 SOURCES/qemu-pr-helper.service create mode 100644 SOURCES/qemu-pr-helper.socket create mode 100644 SOURCES/udev-kvm-check.c create mode 100644 SOURCES/vhost.conf create mode 100644 SPECS/qemu-kvm.spec diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..39356a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/qemu-4.2.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata new file mode 100644 index 0000000..f479eb3 --- /dev/null +++ b/.qemu-kvm.metadata @@ -0,0 +1 @@ +b27aa828a8457bd8551ae3c81b80cc365e1f6bfe SOURCES/qemu-4.2.0.tar.xz diff --git a/SOURCES/0005-Initial-redhat-build.patch b/SOURCES/0005-Initial-redhat-build.patch new file mode 100644 index 0000000..cde66a1 --- /dev/null +++ b/SOURCES/0005-Initial-redhat-build.patch @@ -0,0 +1,167 @@ +From 4df157781801c50224373be57fa3c8c3741c0535 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 12 Oct 2018 07:31:11 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + + - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent + - Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + - Use "/share/qemu-kvm" as SHARE_SUFFIX + - We reconfigured our share to qemu-kvm to be consistent with used name + +This rebase includes changes up to qemu-kvm-4.1.0-18.el8 + +Rebase notes (3.1.0): +- added new configure options + +Rebase notes (4.0.0): +- Added dependency to perl-Test-Harness (upstream) +- Added dependency to python3-sphinx (upstream) +- Change location of icons (upstream) +- Remove .desktop file (added upstream) +- Added qemu-trace-stap (added upstream) +- Removed elf2dmp (added upstream) +- Remove .buildinfo +- Added pvh.bin rom (added upstream) +- Added interop documentation files +- Use python module instead of qemu.py (upstream) + +Rebase notes (4.1.0): +- Remove edk2 files generated by build +- Switch to rhel-8.1-candidate build target +- Remove specs documentation +- Switched from libssh2 to libssh +- Add rc0 tarball usage hacks +- Added BuildRequires for wget, rpm-build and python3-sphinx +- Removed new unpacked files +- Update configure line to use new options + +Rebase notes (4.2.0): +- Disable iotest run during make check +- README renamed to README.rst (upstream) +- Removed ui-spice-app.so +- Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" +- Removed qemu-ga.8 install from spec file - installed by make +- Removed spapr-rtas.bin (upstream) +- Require newer SLOF (20191022) + +Merged patches (3.1.0): +- 01f0c9f RHEL8: Add disable configure options to qemu spec file +- Spec file cleanups + +Merged patches (4.0.0): +- aa4297c Add edk2 Requires to qemu-kvm +- d124ff5779 Fixing brew build target +- eb204b5 Introduce the qemu-kvm-tests rpm +- 223cf0c Load kvm module during boot (partial) + +Merged patches (4.1.0): +- ebb6e97 redhat: Fix LOCALVERSION creation +- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) +- 7cb3c4a Enable libpmem to support nvdimm +- 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- 69e1fb2 enable virgla +- d4f6115 enable virgl, for real this time ... + +Signed-off-by: Danilo C. L. de Paula +--- + .gitignore | 1 + + Makefile | 3 +- + configure | 1 + + os-posix.c | 2 +- + redhat/Makefile | 82 + + redhat/Makefile.common | 51 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 2434 +++++++++++++++++++++++++++++ + redhat/scripts/process-patches.sh | 7 +- + tests/Makefile.include | 2 +- + ui/vnc.c | 2 +- + 11 files changed, 2615 insertions(+), 9 deletions(-) + create mode 100644 redhat/Makefile + create mode 100644 redhat/Makefile.common + create mode 100644 redhat/README.tests + create mode 100644 redhat/qemu-kvm.spec.template + +diff --git a/Makefile b/Makefile +index b437a346d7..086727dbb9 100644 +--- a/Makefile ++++ b/Makefile +@@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM + CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 + CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC + CAP_CFLAGS += -DCAPSTONE_HAS_X86 ++CAP_CFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS + + .PHONY: capstone/all + capstone/all: .git-submodule-status +@@ -826,7 +827,7 @@ install-doc: $(DOCS) install-sphinxdocs + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" + ifdef CONFIG_POSIX + $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" +- $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" ++ $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" + $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" +diff --git a/configure b/configure +index 6099be1d84..16564f8ccc 100755 +--- a/configure ++++ b/configure +@@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then + seccomp="no" + fi + fi ++ + ########################################## + # xen probe + +diff --git a/os-posix.c b/os-posix.c +index 86cffd2c7d..1c9f86768d 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -83,7 +83,7 @@ void os_setup_signal_handling(void) + /* Find a likely location for support files using the location of the binary. + For installed binaries this will be "$bindir/../share/qemu". When + running from the build tree this will be "$bindir/../pc-bios". */ +-#define SHARE_SUFFIX "/share/qemu" ++#define SHARE_SUFFIX "/share/qemu-kvm" + #define BUILD_SUFFIX "/pc-bios" + char *os_find_datadir(void) + { +diff --git a/tests/Makefile.include b/tests/Makefile.include +index 8566f5f119..b483790cf3 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) + check-qapi-schema: check-tests/qapi-schema/frontend check-tests/qapi-schema/doc-good.texi + check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS)) + check-block: $(patsubst %,check-%, $(check-block-y)) +-check: check-block check-qapi-schema check-unit check-softfloat check-qtest check-decodetree ++check: check-qapi-schema check-unit check-softfloat check-qtest check-decodetree + check-clean: + rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) + rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) +diff --git a/ui/vnc.c b/ui/vnc.c +index 87b8045afe..ecf6276f5b 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) + + #ifdef CONFIG_VNC_SASL + if (sasl) { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.21.0 + diff --git a/SOURCES/0006-Enable-disable-devices-for-RHEL.patch b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch new file mode 100644 index 0000000..b14bb1b --- /dev/null +++ b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch @@ -0,0 +1,994 @@ +From 67511676246cce57becbd2dcf5abccf08d9ef737 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 11 Jan 2016 11:53:33 +0100 +Subject: Enable/disable devices for RHEL + +This commit adds all changes related to changes in supported devices. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (qemu 3.1.0) +- spapr_rng disabled in default_config +- new hyperv.mak in default configs +- Move changes from x86_64-softmmu.mak to i386-softmmu.mak +- Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak +- Removed config_vga_isa.c changes as no longer needed +- Removed new devices + +Rebase notes (4.0.0): +- Added CONFIG_PCI_EXPRESS_GENERIC_BRIDGE for aarch64-softmmu.mak +- Added CONFIG_ARM_VIRT for aarch64-softmmu.mak +- Switch to KConfig (upstream) + - Using device whitelist + without-defualt-devices option + +Rebase notes (4.1.0): +- Added CONFIG_USB_OHCI_PCI for ppc64 +- Added CONFIG_XIVE_KVM for ppc64 +- Added CONFIG_ACPI_PCI for x86_64 +- Added CONFIG_SEMIHOSTING for aarch64 +- Cleanup aarch64 devices +- Do not build a15mpcore.c +- Removed ide-isa.c stub file +- Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) + +Rebase notes (4.2.0-rc0): +- Use conditional build for isa-superio.c (upstream change) +- Rename PCI_PIIX to PCI_I440FX (upstream change) + +Rebase notes (4.2.0-rc3): +- Disabled ccid-card-emulated (patch 92566) +- Disabled vfio-pci-igd-lpc-bridge (patch 92565) + +Merged patches (qemu 3.1.0): +- d51e082 Re-enable CONFIG_HYPERV_TESTDEV +- 4b889f3 Declare cirrus-vga as deprecated +- b579d32 Do not build bluetooth support +- 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 +- 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 + +Merged patches (4.1.0): +- 20a51f6 fdc: Revert downstream disablement of device "floppy" +- f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types +- 5909721 aarch64: Compile out IOH3420 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- 495a27d x86_64-rh-devices: add missing TPM passthrough +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- f7587dd RHEL: disable hostmem-memfd + +Signed-off-by: Danilo C. L. de Paula +--- + Makefile.objs | 4 +- + backends/Makefile.objs | 3 +- + default-configs/aarch64-rh-devices.mak | 20 +++++ + default-configs/aarch64-softmmu.mak | 10 ++- + default-configs/ppc64-rh-devices.mak | 32 ++++++++ + default-configs/ppc64-softmmu.mak | 8 +- + default-configs/rh-virtio.mak | 10 +++ + default-configs/s390x-rh-devices.mak | 15 ++++ + default-configs/s390x-softmmu.mak | 4 +- + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ + default-configs/x86_64-softmmu.mak | 4 +- + hw/acpi/ich9.c | 4 +- + hw/arm/Makefile.objs | 2 +- + hw/block/fdc.c | 10 +++ + hw/bt/Makefile.objs | 4 +- + hw/cpu/Makefile.objs | 5 +- + hw/display/Makefile.objs | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/pci-host/i440fx.c | 4 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/Makefile.objs | 4 +- + hw/vfio/pci-quirks.c | 9 +++ + hw/vfio/pci.c | 5 ++ + qemu-options.hx | 7 +- + redhat/qemu-kvm.spec.template | 5 +- + target/arm/cpu.c | 4 +- + target/i386/cpu.c | 35 +++++++-- + target/ppc/cpu-models.c | 10 +++ + target/s390x/cpu_models.c | 3 + + target/s390x/kvm.c | 8 ++ + util/memfd.c | 2 +- + vl.c | 8 +- + 35 files changed, 317 insertions(+), 41 deletions(-) + create mode 100644 default-configs/aarch64-rh-devices.mak + create mode 100644 default-configs/ppc64-rh-devices.mak + create mode 100644 default-configs/rh-virtio.mak + create mode 100644 default-configs/s390x-rh-devices.mak + create mode 100644 default-configs/x86_64-rh-devices.mak + +diff --git a/Makefile.objs b/Makefile.objs +index 11ba1a36bd..fcf63e1096 100644 +--- a/Makefile.objs ++++ b/Makefile.objs +@@ -65,8 +65,8 @@ common-obj-y += replay/ + + common-obj-y += ui/ + common-obj-m += ui/ +-common-obj-y += bt-host.o bt-vhci.o +-bt-host.o-cflags := $(BLUEZ_CFLAGS) ++#common-obj-y += bt-host.o bt-vhci.o ++#bt-host.o-cflags := $(BLUEZ_CFLAGS) + + common-obj-y += dma-helpers.o + common-obj-y += vl.o +diff --git a/backends/Makefile.objs b/backends/Makefile.objs +index f0691116e8..f328d404bf 100644 +--- a/backends/Makefile.objs ++++ b/backends/Makefile.objs +@@ -16,4 +16,5 @@ endif + + common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o + +-common-obj-$(CONFIG_LINUX) += hostmem-memfd.o ++# RHEL: disable memfd ++# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o +diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..a1ed641174 +--- /dev/null ++++ b/default-configs/aarch64-rh-devices.mak +@@ -0,0 +1,20 @@ ++include rh-virtio.mak ++ ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_EDID=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_SCSI=y ++CONFIG_SEMIHOSTING=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y +diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak +index 958b1e08e4..8f6867d48a 100644 +--- a/default-configs/aarch64-softmmu.mak ++++ b/default-configs/aarch64-softmmu.mak +@@ -1,8 +1,10 @@ + # Default configuration for aarch64-softmmu + + # We support all the 32 bit boards so need all their config +-include arm-softmmu.mak ++#include arm-softmmu.mak + +-CONFIG_XLNX_ZYNQMP_ARM=y +-CONFIG_XLNX_VERSAL=y +-CONFIG_SBSA_REF=y ++#CONFIG_XLNX_ZYNQMP_ARM=y ++#CONFIG_XLNX_VERSAL=y ++#CONFIG_SBSA_REF=y ++ ++include aarch64-rh-devices.mak +diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..35f2106d06 +--- /dev/null ++++ b/default-configs/ppc64-rh-devices.mak +@@ -0,0 +1,32 @@ ++include rh-virtio.mak ++ ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y ++CONFIG_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XICS_KVM=y ++CONFIG_XICS_SPAPR=y ++CONFIG_XIVE=y ++CONFIG_XIVE_SPAPR=y ++CONFIG_XIVE_KVM=y +diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak +index cca52665d9..fec354f327 100644 +--- a/default-configs/ppc64-softmmu.mak ++++ b/default-configs/ppc64-softmmu.mak +@@ -1,10 +1,12 @@ + # Default configuration for ppc64-softmmu + + # Include all 32-bit boards +-include ppc-softmmu.mak ++#include ppc-softmmu.mak + + # For PowerNV +-CONFIG_POWERNV=y ++#CONFIG_POWERNV=y + + # For pSeries +-CONFIG_PSERIES=y ++#CONFIG_PSERIES=y ++ ++include ppc64-rh-devices.mak +diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/default-configs/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..c3c73fe752 +--- /dev/null ++++ b/default-configs/s390x-rh-devices.mak +@@ -0,0 +1,15 @@ ++include rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_TERMINAL3270=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y +diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak +index f2287a133f..3e2e388e91 100644 +--- a/default-configs/s390x-softmmu.mak ++++ b/default-configs/s390x-softmmu.mak +@@ -10,4 +10,6 @@ + + # Boards: + # +-CONFIG_S390_CCW_VIRTIO=y ++#CONFIG_S390_CCW_VIRTIO=y ++ ++include s390x-rh-devices.mak +diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..d59b6d9bb5 +--- /dev/null ++++ b/default-configs/x86_64-rh-devices.mak +@@ -0,0 +1,100 @@ ++include rh-virtio.mak ++ ++CONFIG_AC97=y ++CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_OPENGL=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_I440FX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_QXL=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SGA=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_SPICE=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak +index 64b2ee2960..b5de7e5279 100644 +--- a/default-configs/x86_64-softmmu.mak ++++ b/default-configs/x86_64-softmmu.mak +@@ -1,3 +1,5 @@ + # Default configuration for x86_64-softmmu + +-include i386-softmmu.mak ++#include i386-softmmu.mak ++ ++include x86_64-rh-devices.mak +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 2034dd749e..ab203ad448 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + + object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, +diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs +index fe749f65fd..2aa1a9efdd 100644 +--- a/hw/arm/Makefile.objs ++++ b/hw/arm/Makefile.objs +@@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o + obj-$(CONFIG_ZYNQ) += xilinx_zynq.o + obj-$(CONFIG_SABRELITE) += sabrelite.o + +-obj-$(CONFIG_ARM_V7M) += armv7m.o ++#obj-$(CONFIG_ARM_V7M) += armv7m.o + obj-$(CONFIG_EXYNOS4) += exynos4210.o + obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o + obj-$(CONFIG_DIGIC) += digic.o +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index ac5d31e8c1..e925bac002 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -46,6 +46,8 @@ + #include "qemu/module.h" + #include "trace.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2638,6 +2640,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, + int i, j; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + } +diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs +index 867a7d2e8a..e678e9ee3c 100644 +--- a/hw/bt/Makefile.objs ++++ b/hw/bt/Makefile.objs +@@ -1,3 +1,3 @@ +-common-obj-y += core.o l2cap.o sdp.o hci.o hid.o +-common-obj-y += hci-csr.o ++#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o ++#common-obj-y += hci-csr.o + +diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs +index 8db9e8a7b3..1601ea93c7 100644 +--- a/hw/cpu/Makefile.objs ++++ b/hw/cpu/Makefile.objs +@@ -1,5 +1,6 @@ + obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o + obj-$(CONFIG_REALVIEW) += realview_mpcore.o + obj-$(CONFIG_A9MPCORE) += a9mpcore.o +-obj-$(CONFIG_A15MPCORE) += a15mpcore.o +-common-obj-y += core.o cluster.o ++#obj-$(CONFIG_A15MPCORE) += a15mpcore.o ++common-obj-y += core.o ++# cluster.o +diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs +index f2182e3bef..3d0cda1b52 100644 +--- a/hw/display/Makefile.objs ++++ b/hw/display/Makefile.objs +@@ -1,8 +1,9 @@ + common-obj-$(CONFIG_DDC) += i2c-ddc.o + common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o + +-common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o +-common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o ++# Disabled for Red Hat Enterprise Linux ++#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o ++#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o + + common-obj-$(CONFIG_ADS7846) += ads7846.o + common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index cd283e53b4..93afa26fda 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ + /* follow real hardware, cirrus card emulated has 4 MB video memory. + Also accept 8 MB/16 MB for backward compatibility. */ + if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index db313dd3b1..e14858ca64 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index f0acfd86f7..390eb6579c 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + dc->realize = i8042_realizefn; + dc->vmsd = &vmstate_kbd_isa; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index a73f8d404e..fc73fdd6fa 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1807,6 +1808,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c +index f27131102d..17f10efae2 100644 +--- a/hw/pci-host/i440fx.c ++++ b/hw/pci-host/i440fx.c +@@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { + }, + }; + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* IGD Passthrough Host Bridge. */ + typedef struct { + uint8_t offset; +@@ -469,6 +470,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { + .instance_size = sizeof(PCII440FXState), + .class_init = igd_passthrough_i440fx_class_init, + }; ++#endif + + static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +@@ -514,7 +516,9 @@ static const TypeInfo i440fx_pcihost_info = { + static void i440fx_register_types(void) + { + type_register_static(&i440fx_info); ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + type_register_static(&igd_passthrough_i440fx_info); ++#endif + type_register_static(&i440fx_pcihost_info); + } + +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 8339c4c0f8..301cd7b4e4 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -403,10 +403,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs +index 303ac084a0..700a91886e 100644 +--- a/hw/usb/Makefile.objs ++++ b/hw/usb/Makefile.objs +@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_BLUETOOTH) += dev-bluetooth.o + ifeq ($(CONFIG_USB_SMARTCARD),y) + common-obj-y += dev-smartcard-reader.o + common-obj-$(CONFIG_SMARTCARD) += smartcard.mo +-smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o ++# Disabled for Red Hat Enterprise Linux: ++# smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o ++smartcard.mo-objs := ccid-card-passthru.o + smartcard.mo-cflags := $(SMARTCARD_CFLAGS) + smartcard.mo-libs := $(SMARTCARD_LIBS) + endif +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 136f3a9ad6..4505ffe48a 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1166,6 +1166,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) + trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * Intel IGD support + * +@@ -1239,6 +1240,7 @@ static int igd_gen(VFIOPCIDevice *vdev) + + return 8; /* Assume newer is compatible */ + } ++#endif + + typedef struct VFIOIGDQuirk { + struct VFIOPCIDevice *vdev; +@@ -1311,6 +1313,7 @@ typedef struct { + uint8_t len; + } IGDHostInfo; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static const IGDHostInfo igd_host_bridge_infos[] = { + {PCI_REVISION_ID, 2}, + {PCI_SUBSYSTEM_VENDOR_ID, 2}, +@@ -1559,9 +1562,11 @@ static const MemoryRegionOps vfio_igd_index_quirk = { + .write = vfio_igd_quirk_index_write, + .endianness = DEVICE_LITTLE_ENDIAN, + }; ++#endif + + static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + struct vfio_region_info *rom = NULL, *opregion = NULL, + *host = NULL, *lpc = NULL; + VFIOQuirk *quirk; +@@ -1572,6 +1577,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + uint32_t gmch; + uint16_t cmd_orig, cmd; + Error *err = NULL; ++#endif + + /* + * This must be an Intel VGA device at address 00:02.0 for us to even +@@ -1585,6 +1591,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++ + /* + * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we + * can stuff host values into, so if there's already one there and it's not +@@ -1809,6 +1817,7 @@ out: + g_free(opregion); + g_free(host); + g_free(lpc); ++#endif + } + + /* +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2d40b396f2..c8534d3035 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3220,6 +3220,7 @@ static const TypeInfo vfio_pci_dev_info = { + }, + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static Property vfio_pci_dev_nohotplug_properties[] = { + DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_END_OF_LIST(), +@@ -3239,11 +3240,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { + .instance_size = sizeof(VFIOPCIDevice), + .class_init = vfio_pci_nohotplug_dev_class_init, + }; ++#endif + + static void register_vfio_pci_dev_type(void) + { + type_register_static(&vfio_pci_dev_info); ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + type_register_static(&vfio_pci_nohotplug_dev_info); ++#endif + } + + type_init(register_vfio_pci_dev_type) +diff --git a/qemu-options.hx b/qemu-options.hx +index 65c9473b73..fc17aca631 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2111,11 +2111,6 @@ ETEXI + + DEF("no-hpet", 0, QEMU_OPTION_no_hpet, + "-no-hpet disable HPET\n", QEMU_ARCH_I386) +-STEXI +-@item -no-hpet +-@findex -no-hpet +-Disable HPET support. +-ETEXI + + DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, + "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" +@@ -3125,6 +3120,7 @@ STEXI + ETEXI + DEFHEADING() + ++#if 0 + DEFHEADING(Bluetooth(R) options:) + STEXI + @table @option +@@ -3203,6 +3199,7 @@ STEXI + @end table + ETEXI + DEFHEADING() ++#endif + + #ifdef CONFIG_TPM + DEFHEADING(TPM device options:) +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 7a4ac9339b..3788fc3c4a 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) + type_register_static(&idau_interface_type_info); + + while (info->name) { +- cpu_register(info); ++ /* RHEL specific: Filter out unsupported cpu models */ ++ if (!strcmp(info->name, "cortex-a15")) ++ cpu_register(info); + info++; + } + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 69f518a21a..1b7880ae3a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1835,14 +1835,14 @@ static X86CPUDefinition builtin_x86_defs[] = { + .family = 6, + .model = 6, + .stepping = 3, +- .features[FEAT_1_EDX] = +- PPRO_FEATURES | +- CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | +- CPUID_PSE36, +- .features[FEAT_1_ECX] = +- CPUID_EXT_SSE3 | CPUID_EXT_CX16, +- .features[FEAT_8000_0001_EDX] = +- CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, ++ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | ++ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | ++ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | ++ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | ++ CPUID_PSE | CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, + .xlevel = 0x8000000A, +@@ -2128,6 +2128,25 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x80000008, + .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", + }, ++ { ++ .name = "cpu64-rhel6", ++ .level = 4, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 6, ++ .model = 13, ++ .stepping = 3, ++ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | ++ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | ++ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | ++ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | ++ CPUID_PSE | CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, ++ .xlevel = 0x8000000A, ++ .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", ++ }, + { + .name = "Conroe", + .level = 10, +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 086548e9b9..1bbf378c18 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -66,6 +66,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 401 family */ + POWERPC_DEF("401", CPU_POWERPC_401, 401, +@@ -740,8 +741,10 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -760,6 +763,7 @@ + "PowerPC 970MP v1.1") + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -780,6 +784,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + { "403", "403gc" }, + { "405", "405d4" }, + { "405cr", "405crc" }, +@@ -938,12 +943,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, +@@ -952,6 +960,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power9", "power9_v2.0" }, + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -959,5 +968,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 7e92fb2e15..be718220d7 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 0c9d14b4b1..a02d569537 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +diff --git a/util/memfd.c b/util/memfd.c +index 4a3c07e0be..3303ec9da4 100644 +--- a/util/memfd.c ++++ b/util/memfd.c +@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) + */ + bool qemu_memfd_check(unsigned int flags) + { +-#ifdef CONFIG_LINUX ++#if 0 /* RHEL: memfd support disabled */ + int mfd = memfd_create("test", flags | MFD_CLOEXEC); + + if (mfd >= 0) { +diff --git a/vl.c b/vl.c +index 6a65a64bfd..668a34577e 100644 +--- a/vl.c ++++ b/vl.c +@@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; + int win2k_install_hack = 0; + int singlestep = 0; + int acpi_enabled = 1; +-int no_hpet = 0; ++int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ + int fd_bootchk = 1; + static int no_reboot; + int no_shutdown = 0; +@@ -914,6 +914,7 @@ static void configure_rtc(QemuOpts *opts) + } + } + ++#if 0 // Disabled for Red Hat Enterprise Linux + /***********************************************************/ + /* Bluetooth support */ + static int nb_hcis; +@@ -1035,6 +1036,7 @@ static int bt_parse(const char *opt) + error_report("bad bluetooth parameter '%s'", opt); + return 1; + } ++#endif + + static int parse_name(void *opaque, QemuOpts *opts, Error **errp) + { +@@ -3128,6 +3130,7 @@ int main(int argc, char **argv, char **envp) + } + break; + #endif ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + case QEMU_OPTION_bt: + warn_report("The bluetooth subsystem is deprecated and will " + "be removed soon. If the bluetooth subsystem is " +@@ -3135,6 +3138,7 @@ int main(int argc, char **argv, char **envp) + "qemu-devel@nongnu.org with your usecase."); + add_device_config(DEV_BT, optarg); + break; ++#endif + case QEMU_OPTION_audio_help: + audio_legacy_help(); + exit (0); +@@ -4282,9 +4286,11 @@ int main(int argc, char **argv, char **envp) + + tpm_init(); + ++#if 0 // Disabled for Red Hat Enterprise Linux + /* init the bluetooth world */ + if (foreach_device_config(DEV_BT, bt_parse)) + exit(1); ++#endif + + if (!xen_enabled()) { + /* On 32-bit hosts, QEMU is limited by virtual address space */ +-- +2.21.0 + diff --git a/SOURCES/0007-Machine-type-related-general-changes.patch b/SOURCES/0007-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..4ae3966 --- /dev/null +++ b/SOURCES/0007-Machine-type-related-general-changes.patch @@ -0,0 +1,675 @@ +From 113078b23a4747b07eb363719d7cbc0af403dd2a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 11 Jan 2019 09:54:45 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove e1000 device duplication changes to reflect upstream solution +- Rewrite machine compat properties to upstream solution + +Rebase changes (4.1.0): +- Removed optional flag for machine compat properties (upstream) +- Remove c3e002cb chunk from hw/net/e1000.c +- Reorder compat structures +- Use one format for compat scructures +- Added compat for virtio-balloon-pci.any_layout for rhel71 + +Merged patches (4.0.0): +- d4c0957 compat: Generic HW_COMPAT_RHEL7_6 +- cbac773 virtio: Make disable-legacy/disable-modern compat properties optional + +Merged patches (4.1.0): +- 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments +- f19738e compat: Generic hw_compat_rhel_8_0 + +Merged patches (4.2.0): +- 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 +- ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional +- compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/acpi/ich9.c | 16 ++++ + hw/acpi/piix4.c | 5 +- + hw/char/serial.c | 16 ++++ + hw/core/machine.c | 170 ++++++++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/net/e1000e.c | 21 +++++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 ++ + hw/smbios/smbios.c | 1 + + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci.c | 20 +++++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 24 ++++++ + include/hw/usb.h | 4 + + migration/migration.c | 2 + + migration/migration.h | 5 ++ + 18 files changed, 301 insertions(+), 6 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index ab203ad448..7ec26884e8 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) + s->pm.enable_tco = value; + } + ++static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ return s->pm.force_rev1_fadt; ++} ++ ++static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ s->pm.force_rev1_fadt = value; ++} ++ + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -468,6 +480,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy, + NULL); ++ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", ++ ich9_pm_get_force_rev1_fadt, ++ ich9_pm_set_force_rev1_fadt, ++ NULL); + object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", + ich9_pm_get_disable_s3, + ich9_pm_set_disable_s3, +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index 93aec2dd2c..3a26193cbe 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, + .minimum_version_id = 3, ++ .minimum_version_id = 2, + .post_load = vmstate_acpi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -627,8 +628,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, + use_acpi_pci_hotplug, true), +diff --git a/hw/char/serial.c b/hw/char/serial.c +index b4aa250950..0012f0e44d 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -34,6 +34,7 @@ + #include "sysemu/runstate.h" + #include "qemu/error-report.h" + #include "trace.h" ++#include "migration/migration.h" + + //#define DEBUG_SERIAL + +@@ -703,6 +704,9 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -784,6 +788,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return timer_pending(s->fifo_timeout_timer); + } + +@@ -801,6 +809,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->timeout_ipending != 0; + } + +@@ -818,6 +830,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->poll_msl >= 0; + } + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 1689ad3bf8..e0e0eec8bf 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -27,6 +27,176 @@ + #include "hw/pci/pci.h" + #include "hw/mem/nvdimm.h" + ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-pci", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ ++/* The same as hw_compat_2_11 + hw_compat_2_10 */ ++GlobalProperty hw_compat_rhel_7_5[] = { ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "hpet", "hpet-offset-saved", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "virtio-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "vhost-user-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 ++ bz 1608778 modified for our naming */ ++ { "e1000-82540em", "migrate_tso_props", "off" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-mouse-device", "wheel-axis", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-tablet-device", "wheel-axis", "false" }, ++ { "cirrus-vga", "vgamem_mb", "16" }, ++ { "migration", "decompress-error-check", "off" }, ++}; ++const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); ++ ++/* Mostly like hw_compat_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++GlobalProperty hw_compat_rhel_7_4[] = { ++ { "intel-iommu", "pt", "off" }, ++}; ++ ++const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++GlobalProperty hw_compat_rhel_7_3[] = { ++ { "virtio-mmio", "format_transport_address", "off" }, ++ { "virtio-serial-device", "emergency-write", "off" }, ++ { "ioapic", "version", "0x11" }, ++ { "intel-iommu", "x-buggy-eim", "true" }, ++ { "virtio-pci", "x-ignore-backend-features", "on" }, ++ { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, ++ { "fw_cfg_io", "x-file-slots", stringify(0x10) }, ++ { "pflash_cfi01", "old-multiple-chip-handling", "on" }, ++ { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, ++ { "virtio-pci", "x-pcie-deverr-init", "off" }, ++ { "virtio-pci", "x-pcie-lnkctl-init", "off" }, ++ { "virtio-pci", "x-pcie-pm-init", "off" }, ++ { "virtio-net-device", "x-mtu-bypass-backend", "off" }, ++ { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, ++}; ++const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); ++ ++/* Mostly like hw_compat_2_4 + 2_3 but: ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ ++GlobalProperty hw_compat_rhel_7_2[] = { ++ { "virtio-blk-device", "scsi", "true" }, ++ { "e1000-82540em", "extra_mac_registers", "off" }, ++ { "virtio-pci", "x-disable-pcie", "on" }, ++ { "virtio-pci", "migrate-extra", "off" }, ++ { "fw_cfg_mem", "dma_enabled", "off" }, ++ { "fw_cfg_io", "dma_enabled", "off" }, ++ { "isa-fdc", "fallback", "144" }, ++ /* Optional because not all virtio-pci devices support legacy mode */ ++ { "virtio-pci", "disable-modern", "on", .optional = true }, ++ { "virtio-pci", "disable-legacy", "off", .optional = true }, ++ { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, ++ { "virtio-pci", "page-per-vq", "on" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "send-section-footer", "off" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "store-global-state", "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); ++ ++/* Mostly like hw_compat_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ */ ++GlobalProperty hw_compat_rhel_7_1[] = { ++ { "intel-hda-generic", "old_msi_addr", "on" }, ++ { "VGA", "qemu-extended-regs", "off" }, ++ { "secondary-vga", "qemu-extended-regs", "off" }, ++ { "usb-mouse", "usb_version", stringify(1) }, ++ { "usb-kbd", "usb_version", stringify(1) }, ++ { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, ++ { "virtio-blk-pci", "any_layout", "off" }, ++ { "virtio-balloon-pci", "any_layout", "off" }, ++ { "virtio-serial-pci", "any_layout", "off" }, ++ { "virtio-9p-pci", "any_layout", "off" }, ++ { "virtio-rng-pci", "any_layout", "off" }, ++ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ ++ { "migration", "send-configuration", "off" }, ++}; ++const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); ++ + GlobalProperty hw_compat_4_1[] = { + { "virtio-pci", "x-pcie-flr-init", "off" }, + }; +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index 873e5e9706..d1a2efe47e 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index b69fd7d8ad..d8be50a1ce 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -79,6 +79,11 @@ typedef struct E1000EState { + + E1000ECore core; + ++ /* 7.3 had the intr_state field that was in the original e1000e code ++ * but that was removed prior to 2.7's release ++ */ ++ bool redhat_7_3_intr_state_enable; ++ uint32_t redhat_7_3_intr_state; + } E1000EState; + + #define E1000E_MMIO_IDX 0 +@@ -94,6 +99,10 @@ typedef struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + ++/* Values as in RHEL 7.3 build and original upstream */ ++#define RH_E1000E_USE_MSI BIT(0) ++#define RH_E1000E_USE_MSIX BIT(1) ++ + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -305,6 +314,8 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -476,6 +487,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -599,6 +612,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + ++static bool rhel_7_3_check(void *opaque, int version_id) ++{ ++ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; ++} ++ + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -610,6 +628,7 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), ++ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -658,6 +677,8 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), ++ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, ++ redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 88a97d756d..21d80e96cf 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3258,7 +3258,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 74ae74bc5c..73820517df 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -42,6 +42,7 @@ + #include "qapi/visitor.h" + #include "exec/address-spaces.h" + #include "hw/rtc/mc146818rtc_regs.h" ++#include "migration/migration.h" + + #ifdef TARGET_I386 + #include "qapi/qapi-commands-misc-target.h" +@@ -820,6 +821,11 @@ static int rtc_post_load(void *opaque, int version_id) + static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; ++ ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 11d476c4a2..e6e9355384 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type2.product, product); + SMBIOS_SET_DEFAULT(type2.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 23507ad3b5..9fd87a7ad9 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + UHCIState *s = UHCI(dev); + uint8_t *pci_conf = s->dev.config; + int i; ++ int irq_pin; + + pci_conf[PCI_CLASS_PROG] = 0x00; + /* TODO: reset value should be 0. */ + pci_conf[USB_SBRN] = USB_RELEASE_1; // release number + +- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); ++ irq_pin = u->info.irq_pin; ++ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + + if (s->masterbus) { + USBPort *ports[NB_PORTS]; +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 80988bb305..8fed2eedd6 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + ++static int xhci_event_pre_save(void *opaque) ++{ ++ XHCIEvent *s = opaque; ++ ++ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; ++ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; ++ ++ return 0; ++} ++ ++bool migrate_cve_2014_5263_xhci_fields; ++ ++static bool xhci_event_cve_2014_5263(void *opaque, int version_id) ++{ ++ return migrate_cve_2014_5263_xhci_fields; ++} ++ + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, ++ .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3601,6 +3619,8 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), ++ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), ++ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index 2fad4df2a7..f554b671e3 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -157,6 +157,8 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; ++ uint8_t cve_2014_5263_a; ++ uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index 41568d1837..1a23ccc412 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { + uint8_t smm_enabled; + bool enable_tco; + TCOIORegs tco_regs; ++ ++ /* RH addition, see bz 1489800 */ ++ bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/boards.h b/include/hw/boards.h +index de45087f34..6f85a0e032 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -377,4 +377,28 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; ++ ++extern GlobalProperty hw_compat_rhel_7_5[]; ++extern const size_t hw_compat_rhel_7_5_len; ++ ++extern GlobalProperty hw_compat_rhel_7_4[]; ++extern const size_t hw_compat_rhel_7_4_len; ++ ++extern GlobalProperty hw_compat_rhel_7_3[]; ++extern const size_t hw_compat_rhel_7_3_len; ++ ++extern GlobalProperty hw_compat_rhel_7_2[]; ++extern const size_t hw_compat_rhel_7_2_len; ++ ++extern GlobalProperty hw_compat_rhel_7_1[]; ++extern const size_t hw_compat_rhel_7_1_len; ++ + #endif +diff --git a/include/hw/usb.h b/include/hw/usb.h +index c24d968a19..b353438ea0 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, + uint8_t interface_class, uint8_t interface_subclass, + uint8_t interface_protocol); + ++ ++/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ ++extern bool migrate_cve_2014_5263_xhci_fields; ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index 354ad072fa..30c53c623b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -121,6 +121,8 @@ enum mig_rp_message_type { + MIG_RP_MSG_MAX + }; + ++bool migrate_pre_2_2; ++ + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 79b3dda146..0b1b0d4df5 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); + void migrate_add_address(SocketAddress *address); + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); ++/* ++ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards ++ * migration compatibility. ++ */ ++extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use foreach_not_ignored_block in migration code" +-- +2.21.0 + diff --git a/SOURCES/0008-Add-aarch64-machine-types.patch b/SOURCES/0008-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..5397c8b --- /dev/null +++ b/SOURCES/0008-Add-aarch64-machine-types.patch @@ -0,0 +1,276 @@ +From 49164264d9928f73961acbbe4d56d8dfa23d8099 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 12:53:31 +0200 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Use upstream compat handling + +Rebase changes (4.1.0-rc0): +- Removed a15memmap (upstream) +- Use virt_flash_create in rhel800_virt_instance_init + +Rebase changes (4.2.0-rc0): +- Set numa_mem_supported + +Rebase notes (4.2.0-rc3): +- aarch64: Add virt-rhel8.2.0 machine type for ARM (patch 92246) +- aarch64: virt: Allow more than 1TB of RAM (patch 92249) +- aarch64: virt: Allow PCDIMM instantiation (patch 92247) +- aarch64: virt: Enhance the comment related to gic-version (patch 92248) + +Merged patches (4.0.0): +- 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM +- 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 +- 4d20863 aarch64: Use 256MB ECAM region by default + +Merged patches (4.1.0): +- c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM +- 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine + +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 11 +++ + 2 files changed, 171 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d4bedc2607..e10839100e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -72,6 +72,7 @@ + #include "hw/mem/nvdimm.h" + #include "hw/acpi/generic_event_device.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -98,7 +99,49 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .instance_init = rhel##m##n##s##_virt_instance_init, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -1763,6 +1806,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1791,6 +1835,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + vms->virt = value; + } + ++#endif /* disabled for RHEL */ + static bool virt_get_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2022,6 +2067,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return requested_pa_size > 40 ? requested_pa_size : 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void virt_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); +@@ -2258,3 +2304,116 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Start with max_cpus set to 512, which is the maximum supported by KVM. ++ * The value may be reduced later when we have more information about the ++ * configuration of the particular instance. ++ */ ++ mc->max_cpus = 512; ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; ++ mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memhp = true; ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel820_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ /* High memory is enabled by default for RHEL */ ++ vms->highmem = true; ++ object_property_add_bool(obj, "highmem", virt_get_highmem, ++ virt_set_highmem, NULL); ++ object_property_set_description(obj, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits", ++ NULL); ++ /* ++ * Default GIC type is still v2, but became configurable for RHEL. We ++ * keep v2 instead of max as TCG CI test cases require an MSI controller ++ * and there is no userspace ITS MSI emulation available. ++ */ ++ vms->gic_version = 2; ++ object_property_add_str(obj, "gic-version", virt_get_gic_version, ++ virt_set_gic_version, NULL); ++ object_property_set_description(obj, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3 and host", NULL); ++ ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ object_property_add_bool(obj, "its", virt_get_its, ++ virt_set_its, NULL); ++ object_property_set_description(obj, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation", ++ NULL); ++ } ++ ++ /* Default disallows iommu instantiation */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL); ++ object_property_set_description(obj, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3", ++ NULL); ++ ++ vms->irqmap=a15irqmap; ++ virt_flash_create(vms); ++} ++ ++static void rhel820_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 0b41083e9d..53fdf16563 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -142,6 +142,7 @@ typedef struct { + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + #define VIRT_MACHINE(obj) \ + OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) +@@ -150,6 +151,16 @@ typedef struct { + #define VIRT_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++#define VIRT_MACHINE(obj) \ ++ OBJECT_CHECK(VirtMachineState, (obj), TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_GET_CLASS(obj) \ ++ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_CLASS(klass) \ ++ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) ++#endif ++ + void virt_acpi_setup(VirtMachineState *vms); + + /* Return the number of used redistributor regions */ +-- +2.21.0 + diff --git a/SOURCES/0009-Add-ppc64-machine-types.patch b/SOURCES/0009-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..a3f1a54 --- /dev/null +++ b/SOURCES/0009-Add-ppc64-machine-types.patch @@ -0,0 +1,463 @@ +From 136eae41007e2e5b0d693cc656f3ec36cbabf16f Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:27:13 +0200 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- remove instance options and use upstream solution +- Use upstream compat handling +- Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) +- re-add handling of instance_options (removed upstream) +- Use p8 as default for rhel machine types (p9 default upstream) +- sPAPRMachineClass renamed to SpaprMachineClass (upstream) + +Rebase changes (4.1.0): +- Update format for compat structures + +Merged patches (4.0.0): +- 467d59a redhat: define pseries-rhel8.0.0 machine type + +Merged patches (4.1.0): +- f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type +- 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 +- 89f01da redhat: define pseries-rhel8.1.0 machine type + +Merged patches (4.2.0): +- bcba728 redhat: update pseries-rhel8.1.0 machine type +- redhat: update pseries-rhel-7.6.0 machine type (patch 93039) +- redhat: define pseries-rhel8.2.0 machine type (patch 93041) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 278 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 ++ + include/hw/ppc/spapr.h | 1 + + target/ppc/compat.c | 13 +- + target/ppc/cpu.h | 1 + + 5 files changed, 305 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index e076f6023c..8749c72066 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->linux_pci_probe = true; + smc->smp_threads_vsmt = true; + smc->nr_xirqs = SPAPR_NR_XIRQS; ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4491,6 +4492,7 @@ static const TypeInfo spapr_machine_info = { + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-4.2 + */ +@@ -4520,6 +4522,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4536,6 +4539,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4695,6 +4699,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + /* + * pseries-2.7 + */ ++#endif + + static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -4749,6 +4754,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4863,6 +4869,278 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++/* ++ * pseries-rhel8.2.0 ++ */ ++ ++static void spapr_machine_rhel820_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like pseries-3.1 and pseries-4.0 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); ++ ++static void spapr_machine_rhel750_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); ++ ++/* ++ * pseries-rhel7.5.0-sxxm ++ * ++ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.4.0 ++ * like spapr_compat_2_9 ++ */ ++GlobalProperty spapr_compat_rhel7_4[] = { ++ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); ++ ++static void spapr_machine_rhel740_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); ++ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ smc->has_power9_support = false; ++ smc->pre_2_10_has_unused_icps = true; ++ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; ++ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); ++ ++/* ++ * pseries-rhel7.4.0-sxxm ++ * ++ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.3.0 ++ * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ */ ++GlobalProperty spapr_compat_rhel7_3[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, ++ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, ++}; ++const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); ++ ++static void spapr_machine_rhel730_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); ++ mc->default_machine_opts = "modern-hotplug-events=off"; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); ++ ++ smc->phb_placement = phb_placement_2_7; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); ++ ++/* ++ * pseries-rhel7.3.0-sxxm ++ * ++ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.2.0 ++ */ ++/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++ * has been backported to RHEL7_2 so we don't need it here. ++ */ ++ ++GlobalProperty spapr_compat_rhel7_2[] = { ++ { "spapr-vlan", "use-rx-buffer-pools", "off" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, ++}; ++const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); ++ ++static void spapr_machine_rhel720_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->use_ohci_by_default = true; ++ mc->has_hotpluggable_cpus = NULL; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 301cd7b4e4..ba5a8fb82b 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_reset_vcpu(PowerPCCPU *cpu) + { +@@ -242,6 +243,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + Error *local_err = NULL; ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); + if (local_err) { +@@ -254,6 +256,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return; ++ } ++ + if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) { + goto error_intc_create; + } +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index d5ab5ea7b2..aa89cc4a95 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -125,6 +125,7 @@ struct SpaprMachineClass { + bool linux_pci_probe; + bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ + ++ bool has_power9_support; + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7de4bf3122..3e2e35342d 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index e3e82327b7..5c53801cfd 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +-- +2.21.0 + diff --git a/SOURCES/0010-Add-s390x-machine-types.patch b/SOURCES/0010-Add-s390x-machine-types.patch new file mode 100644 index 0000000..d0f6669 --- /dev/null +++ b/SOURCES/0010-Add-s390x-machine-types.patch @@ -0,0 +1,126 @@ +From 0842700b3a01891c316e9169fa651f26714cafa5 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:47:32 +0200 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (weekly-4.1.0): +- Use upstream compat handling + +Merged patches (3.1.0): +- 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later + +Merged patches (4.1.0): +- 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +Merged patches (4.2.0): +- fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 +- a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine +- hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 70 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 69 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index d3edeef0ad..c2c83d2fce 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -615,7 +615,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = 1; \ +@@ -639,6 +639,7 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void ccw_machine_4_2_instance_options(MachineState *machine) + { + } +@@ -866,6 +867,73 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); ++#endif ++ ++static void ccw_machine_rhel820_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel820_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel820_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); ++ ++static void ccw_machine_rhel750_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; ++ ccw_machine_rhel760_instance_options(machine); ++ ++ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is ++ based on 2.10 */ ++ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++ ++ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); ++} ++ ++GlobalProperty ccw_compat_rhel_7_5[] = { ++ { ++ .driver = TYPE_SCLP_EVENT_FACILITY, ++ .property = "allow_all_mask_sizes", ++ .value = "off", ++ }, ++}; ++const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); ++ ++static void ccw_machine_rhel750_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); ++ S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; ++} ++DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); + + static void ccw_machine_register_types(void) + { +-- +2.21.0 + diff --git a/SOURCES/0011-Add-x86_64-machine-types.patch b/SOURCES/0011-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..72a5159 --- /dev/null +++ b/SOURCES/0011-Add-x86_64-machine-types.patch @@ -0,0 +1,897 @@ +From 2ebaeca6e26950f401a8169d1324be2bafd11741 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:10:31 +0200 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (qemu-4.0.0): +- Use upstream compat handling + +Rebase notes (3.1.0): +- Removed xsave changes + +Rebase notes (4.1.0): +- Updated format for compat structures + +Rebase notes (4.2.0-rc2): +- Use X86MachineClass for save_tsc_khz (upstream change) + +Merged patches (4.1.0): +- f4dc802 pc: 7.5 compat entries +- 456ed3e pc: PC_RHEL7_6_COMPAT +- 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type +- b3b3687 pc: Add pc-q35-8.0.0 machine type +- 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT +- 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types +- 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) +- 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types + +Merged patches (4.2.0): +- 7d5c2ef pc: Don't make die-id mandatory unless necessary +- e42808c x86 machine types: pc_rhel_8_0_compat +- 9de83a8 x86 machine types: q35: Fixup units_per_default_bus +- 6df1559 x86 machine types: Fixup dynamic sysbus entries +- 0784125 x86 machine types: add pc-q35-rhel8.1.0 +- machines/x86: Add rhel 8.2 machine type (patch 92959) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 263 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 156 ++++++++++++++++++++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 33 ++++++ + target/i386/cpu.c | 9 +- + target/i386/kvm.c | 4 + + 8 files changed, 673 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 12ff55fcfb..64001893ab 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; ++ if (object_property_get_bool(lpc, ++ "__com.redhat_force-rev1-fadt", NULL)) ++ pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + } + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index ac08e63604..61e70e4811 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -344,6 +344,261 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++GlobalProperty pc_rhel_7_5_compat[] = { ++ /* pc_rhel_7_5_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "legacy-cache", "on" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "topoext", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); ++ ++GlobalProperty pc_rhel_7_4_compat[] = { ++ /* pc_rhel_7_4_compat from pc_compat_2_9 */ ++ { "mch", "extended-tseg-mbytes", stringify(0) }, ++ /* bz 1489800 */ ++ { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "q35-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, ++}; ++const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); ++ ++GlobalProperty pc_rhel_7_3_compat[] = { ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "l3-cache", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "isa-pcspk", "migrate", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_6 */ ++ { TYPE_X86_CPU, "cpuid-0xb", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "ICH9-LPC", "x-smi-broadcast", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ ++ { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, ++}; ++const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); ++ ++GlobalProperty pc_rhel_7_2_compat[] = { ++ { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, ++ { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, ++ { "Haswell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, ++ { TYPE_X86_CPU, "check", "off" }, ++ { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, ++ { TYPE_X86_CPU, "arat", "off" }, ++ { "usb-redir", "streams", "off" }, ++ { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, ++ { "apic-common", "legacy-instance-id", "on" }, ++}; ++const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); ++ ++GlobalProperty pc_rhel_7_1_compat[] = { ++ { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, ++ { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++GlobalProperty pc_rhel_7_0_compat[] = { ++ { "virtio-scsi-pci", "any_layout", "off" }, ++ { "PIIX4_PM", "memory-hotplug-support", "off" }, ++ { "apic", "version", stringify(0x11) }, ++ { "nec-usb-xhci", "superspeed-ports-first", "off" }, ++ { "nec-usb-xhci", "force-pcie-endcap", "on" }, ++ { "pci-serial", "prog_if", stringify(0) }, ++ { "virtio-net-pci", "guest_announce", "off" }, ++ { "ICH9-LPC", "memory-hotplug-support", "off" }, ++ { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, ++ { "ioh3420", COMPAT_PROP_PCP, "off" }, ++ { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, ++ { "e1000", "mitigation", "off" }, ++ { "virtio-net-pci", "ctrl_guest_offloads", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, ++}; ++const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); ++ + void gsi_handler(void *opaque, int n, int level) + { + GSIState *s = opaque; +@@ -1225,7 +1480,8 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- if (pcmc->pci_enabled) { ++ /* RH difference: See bz 1489800, explicitly make ROM ro */ ++ if (pcmc->pc_rom_ro) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -2198,6 +2454,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->linuxboot_dma_enabled = true; + pcmc->pvh_enabled = true; + assert(!mc->get_hotplug_handler); ++ pcmc->pc_rom_ro = true; ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -2209,7 +2467,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->hot_add_cpu = pc_hot_add_cpu; + mc->smp_parse = pc_smp_parse; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; + hc->pre_plug = pc_machine_device_pre_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 1bd70d1abb..bd7fdb99bb 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -53,6 +53,7 @@ + #include "cpu.h" + #include "qapi/error.h" + #include "qemu/error-report.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -307,6 +308,7 @@ else { + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { + PCMachineState *pcms = PC_MACHINE(machine); +@@ -1026,3 +1028,207 @@ static void xenfv_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, + xenfv_machine_options); + #endif ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ pcmc->default_nic_model = "e1000"; ++ m->default_display = "std"; ++ m->no_parallel = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ m->alias = "pc"; ++ m->is_default = 1; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); ++ ++static void pc_init_rhel750(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel750_options(MachineClass *m) ++{ ++ pc_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; ++ m->auto_enable_numa_with_memhp = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, ++ pc_machine_rhel750_options); ++ ++static void pc_init_rhel740(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel750_options(m); ++ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, ++ pc_machine_rhel740_options); ++ ++static void pc_init_rhel730(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel740_options(m); ++ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; ++ pcmc->linuxboot_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, ++ pc_machine_rhel730_options); ++ ++ ++static void pc_init_rhel720(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel720_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel730_options(m); ++ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; ++ /* From pc_i440fx_2_5_machine_options */ ++ x86mc->save_tsc_khz = false; ++ m->legacy_fw_cfg_order = 1; ++ /* Note: broken_reserved_end was already in 7.2 */ ++ /* From pc_i440fx_2_6_machine_options */ ++ pcmc->legacy_cpu_hotplug = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, ++ pc_machine_rhel720_options); ++ ++static void pc_compat_rhel710(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ /* From pc_compat_2_2 */ ++ pcmc->rsdp_in_ram = false; ++ machine->suppress_vmdesc = true; ++ ++ /* From pc_compat_2_1 */ ++ pcmc->smbios_uuid_encoded = false; ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->enforce_aligned_dimm = false; ++ ++ /* Disable all the extra subsections that were added in 2.2 */ ++ migrate_pre_2_2 = true; ++ ++ /* From pc_i440fx_2_4_machine_options */ ++ pcmc->broken_reserved_end = true; ++} ++ ++static void pc_init_rhel710(MachineState *machine) ++{ ++ pc_compat_rhel710(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel710_options(MachineClass *m) ++{ ++ pc_machine_rhel720_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; ++ m->default_display = "cirrus"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); ++ compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, ++ pc_machine_rhel710_options); ++ ++static void pc_compat_rhel700(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ pc_compat_rhel710(machine); ++ ++ /* Upstream enables it for everyone, we're a little more selective */ ++ x86_cpu_change_kvm_default("x2apic", NULL); ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ ++ pcmc->smbios_legacy_mode = true; ++ pcmc->has_reserved_memory = false; ++ migrate_cve_2014_5263_xhci_fields = true; ++} ++ ++static void pc_init_rhel700(MachineState *machine) ++{ ++ pc_compat_rhel700(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel700_options(MachineClass *m) ++{ ++ pc_machine_rhel710_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; ++ compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, ++ pc_machine_rhel700_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 385e5cffb1..7531d8ed76 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -330,6 +330,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -533,3 +534,154 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 384; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++} ++ ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel820_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); ++ ++static void pc_q35_init_rhel750(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel750_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; ++ m->auto_enable_numa_with_memhp = false; ++ pcmc->default_nic_model = "e1000"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, ++ pc_q35_machine_rhel750_options); ++ ++static void pc_q35_init_rhel740(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel750_options(m); ++ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, ++ pc_q35_machine_rhel740_options); ++ ++static void pc_q35_init_rhel730(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel740_options(m); ++ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; ++ m->max_cpus = 255; ++ pcmc->linuxboot_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, ++ pc_q35_machine_rhel730_options); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 6f85a0e032..2920bdef5b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -222,6 +222,8 @@ struct MachineClass { + const char **valid_cpu_types; + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); + bool ignore_boot_device_suffixes; +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 1f86eba3f9..2e362c8faa 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -124,6 +124,9 @@ typedef struct PCMachineClass { + + /* use PVH to load kernels that support this feature */ + bool pvh_enabled; ++ ++ /* RH only, see bz 1489800 */ ++ bool pc_rom_ro; + } PCMachineClass; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -300,6 +303,36 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; + ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; ++ ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; ++ ++extern GlobalProperty pc_rhel_7_5_compat[]; ++extern const size_t pc_rhel_7_5_compat_len; ++ ++extern GlobalProperty pc_rhel_7_4_compat[]; ++extern const size_t pc_rhel_7_4_compat_len; ++ ++extern GlobalProperty pc_rhel_7_3_compat[]; ++extern const size_t pc_rhel_7_3_compat_len; ++ ++extern GlobalProperty pc_rhel_7_2_compat[]; ++extern const size_t pc_rhel_7_2_compat_len; ++ ++extern GlobalProperty pc_rhel_7_1_compat[]; ++extern const size_t pc_rhel_7_1_compat_len; ++ ++extern GlobalProperty pc_rhel_7_0_compat[]; ++extern const size_t pc_rhel_7_0_compat_len; ++ + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1b7880ae3a..790db778ab 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1829,11 +1829,17 @@ static CPUCaches epyc_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { ++ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. ++ * The default on RHEL-6 was cpu64-rhel6. ++ * libvirt assumes that qemu64 is the default for _all_ machine-types, ++ * so we should try to keep qemu64 and cpu64-rhel6 as similar as ++ * possible. ++ */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +- .model = 6, ++ .model = 13, + .stepping = 3, + .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | + CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | +@@ -3932,6 +3938,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 1d10046a6c..86d9a1f364 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -3079,6 +3079,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -3388,6 +3389,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_PV_EOI_EN: + env->pv_eoi_en_msr = msrs[i].data; +-- +2.21.0 + diff --git a/SOURCES/0012-Enable-make-check.patch b/SOURCES/0012-Enable-make-check.patch new file mode 100644 index 0000000..09f7b4e --- /dev/null +++ b/SOURCES/0012-Enable-make-check.patch @@ -0,0 +1,307 @@ +From 154215041df085271a780a2989f4f481226e3e34 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:48:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove testing for pseries-2.7 in endianess test +- Disable device-plug-test on s390x as it use disabled device +- Do not run cpu-plug-tests on 7.3 and older machine types + +Rebase changes (4.1.0-rc0): +- removed iotests 068 + +Rebase changes (4.1.0-rc1): +- remove all 205 tests (unstable) + +Rebase changes (4.2.0-rc0): +- partially disable hd-geo-test (requires lsi53c895a) + +Merged patches (4.0.0): +- f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce + +Merged patches (4.1.0-rc0): +- 41288ff redhat: Remove raw iotest 205 + +Signed-off-by: Danilo C. L. de Paula +--- + redhat/qemu-kvm.spec.template | 2 +- + tests/Makefile.include | 10 +++++----- + tests/boot-serial-test.c | 6 +++++- + tests/cpu-plug-test.c | 4 ++-- + tests/e1000-test.c | 2 ++ + tests/hd-geo-test.c | 4 ++++ + tests/prom-env-test.c | 4 ++++ + tests/qemu-iotests/051 | 12 ++++++------ + tests/qemu-iotests/group | 4 ++-- + tests/test-x86-cpuid-compat.c | 2 ++ + tests/usb-hcd-xhci-test.c | 4 ++++ + 11 files changed, 37 insertions(+), 17 deletions(-) + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index b483790cf3..53bdbdfee0 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) + check-qtest-i386-y += tests/ahci-test$(EXESUF) + check-qtest-i386-y += tests/hd-geo-test$(EXESUF) + check-qtest-i386-y += tests/boot-order-test$(EXESUF) +-check-qtest-i386-y += tests/bios-tables-test$(EXESUF) ++#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) + check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) + check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-i386-y += tests/rtc-test$(EXESUF) +@@ -230,7 +230,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) + check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) + + check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) +-check-qtest-ppc-y += tests/boot-order-test$(EXESUF) ++#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) + check-qtest-ppc-y += tests/prom-env-test$(EXESUF) + check-qtest-ppc-y += tests/drive_del-test$(EXESUF) + check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) +@@ -244,8 +244,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) +-check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +-check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) + check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) + check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) + check-qtest-ppc64-y += tests/numa-test$(EXESUF) +@@ -291,7 +291,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) + check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) + check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) + check-qtest-s390x-y += tests/drive_del-test$(EXESUF) +-check-qtest-s390x-y += tests/device-plug-test$(EXESUF) ++#check-qtest-s390x-y += tests/device-plug-test$(EXESUF) + check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF) + check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) + check-qtest-s390x-y += tests/migration-test$(EXESUF) +diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c +index d3a54a0ba5..33ce72b89c 100644 +--- a/tests/boot-serial-test.c ++++ b/tests/boot-serial-test.c +@@ -108,19 +108,23 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", + "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", + "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c +index 30e514bbfb..a04beae1c6 100644 +--- a/tests/cpu-plug-test.c ++++ b/tests/cpu-plug-test.c +@@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) + char *path; + PlugTestData *data; + +- if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/e1000-test.c b/tests/e1000-test.c +index c387984ef6..c89112d6f8 100644 +--- a/tests/e1000-test.c ++++ b/tests/e1000-test.c +@@ -22,9 +22,11 @@ struct QE1000 { + + static const char *models[] = { + "e1000", ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82540em", + "e1000-82544gc", + "e1000-82545em", ++#endif + }; + + static void *e1000_get_driver(void *obj, const char *interface) +diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c +index 7e86c5416c..cc068bad87 100644 +--- a/tests/hd-geo-test.c ++++ b/tests/hd-geo-test.c +@@ -732,6 +732,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -776,6 +777,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -951,9 +953,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c +index 61bc1d1e7b..028d45c7d7 100644 +--- a/tests/prom-env-test.c ++++ b/tests/prom-env-test.c +@@ -88,10 +88,14 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); + if (g_test_slow()) { ++#endif + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index 53bcdbc911..b387e0c233 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -181,11 +181,11 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-drive,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -234,11 +234,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 6b10a6a762..06cc734b26 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -92,7 +92,7 @@ + 068 rw quick + 069 rw auto quick + 070 rw quick +-071 rw auto quick ++# 071 rw auto quick -- requires whitelisted blkverify + 072 rw auto quick + 073 rw auto quick + 074 rw auto quick +@@ -120,7 +120,7 @@ + 096 rw quick + 097 rw auto backing + 098 rw auto backing quick +-099 rw auto quick ++# 099 rw auto quick -- requires whitelisted blkverify + # 100 was removed, do not reuse + 101 rw quick + 102 rw quick +diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c +index 772287bdb4..e7c075ed98 100644 +--- a/tests/test-x86-cpuid-compat.c ++++ b/tests/test-x86-cpuid-compat.c +@@ -300,6 +300,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,+xstore", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -350,6 +351,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/usb-hcd-xhci-test.c ++++ b/tests/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +2.21.0 + diff --git a/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..db776c4 --- /dev/null +++ b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,114 @@ +From de433da59448eaad4ac1b902d07d57b57f922aff Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Signed-off-by: Bandan Das + +Rebase notes (2.8.0): +- removed return value for vfio_realize (commit 1a22aca) + +Merged patches (2.9.0): +- 17eb774 vfio: Use error_setg when reporting max assigned device overshoot + + Merged patches (4.1.0-rc3): +- 2b89558 vfio: increase the cap on number of assigned devices to 64 + +(cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) +(cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index c8534d3035..309535f306 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -47,6 +47,9 @@ + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2722,9 +2725,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= vdev->assigned_device_limit) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", vdev->assigned_device_limit); ++ return; ++ } ++ + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { +@@ -3167,6 +3191,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 35626cd63e..0cd4803aee 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +2.21.0 + diff --git a/SOURCES/0014-Add-support-statement-to-help-output.patch b/SOURCES/0014-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..cb77bfe --- /dev/null +++ b/SOURCES/0014-Add-support-statement-to-help-output.patch @@ -0,0 +1,58 @@ +From 2754dd8da8975757753fd491985d5e7b36966106 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +(cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) +(cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) +Signed-off-by: Danilo C. L. de Paula +--- + vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/vl.c b/vl.c +index 668a34577e..9f3e7e7733 100644 +--- a/vl.c ++++ b/vl.c +@@ -1822,9 +1822,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + error_get_progname()); +@@ -1841,6 +1849,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +2.21.0 + diff --git a/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..cec862d --- /dev/null +++ b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,152 @@ +From c9c3cf721b0e9e359418f64c2a5121c3f8b5d27a Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula + +Rebase notes (2.11.0): +- Removed CONFIG_RHV reference +- Update commit log + +Merged patches (2.11.0): +- 92fef14623 redhat: remove manual max_cpus limitations for ppc +- bb722e9eff redhat: globally limit the maximum number of CPUs +- fdeef3c1c7 RHEL: Set vcpus hard limit to 240 for Power +- 0584216921 Match POWER max cpus to x86 + +Signed-off-by: Andrew Jones +(cherry picked from commit a4ceb63bdc5cbac19f5f633ec761b9de0dedb55e) +(cherry picked from commit a1f26d85171b4d554225150053700e93ba6eba10) + +redhat: globally limit the maximum number of CPUs + +RH-Author: David Hildenbrand +Message-id: <20180109103253.24517-2-david@redhat.com> +Patchwork-id: 78531 +O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 1/2] redhat: globally limit the maximum number of CPUs +Bugzilla: 1527449 +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Upstream-status: n/a + +For RHEL, we support 240, for RHV up to 384 VCPUs. Let's limit this +globally instead of fixing up all machines. This way, we can easily +change (increase) the product specific levels later. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina + +redhat: remove manual max_cpus limitations for ppc + +RH-Author: David Hildenbrand +Message-id: <20180109103253.24517-3-david@redhat.com> +Patchwork-id: 78532 +O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 2/2] redhat: remove manual max_cpus limitations for ppc +Bugzilla: 1527449 +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Upstream-status: n/a + +RH-Author: Andrew Jones +Message-id: <1390301212-15344-1-git-send-email-drjones@redhat.com> +Patchwork-id: 56862 +O-Subject: [RHEL7.0 qemu-kvm PATCH v6] use recommended max vcpu count +Bugzilla: 998708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Marcelo Tosatti + +The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead +of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. + +This commit matches the limit to current KVM_CAP_NR_VCPUS value. + +Signed-off-by: Danilo C. L. de Paula +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + vl.c | 18 ++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ca00daa2f5..dc3ed7f04e 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1943,6 +1943,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +diff --git a/vl.c b/vl.c +index 9f3e7e7733..1550aa2aaa 100644 +--- a/vl.c ++++ b/vl.c +@@ -134,6 +134,8 @@ int main(int argc, char **argv) + + #define MAX_VIRTIO_CONSOLES 1 + ++#define RHEL_MAX_CPUS 384 ++ + static const char *data_dir[16]; + static int data_dir_idx; + const char *bios_name = NULL; +@@ -1339,6 +1341,20 @@ static MachineClass *find_default_machine(GSList *machines) + return NULL; + } + ++/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++static void limit_max_cpus_in_machines(void) ++{ ++ GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); ++ ++ for (el = machines; el; el = el->next) { ++ MachineClass *mc = el->data; ++ ++ if (mc->max_cpus > RHEL_MAX_CPUS) { ++ mc->max_cpus = RHEL_MAX_CPUS; ++ } ++ } ++} ++ + static int machine_help_func(QemuOpts *opts, MachineState *machine) + { + ObjectProperty *prop; +@@ -3857,6 +3873,8 @@ int main(int argc, char **argv, char **envp) + "mutually exclusive"); + exit(EXIT_FAILURE); + } ++ /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++ limit_max_cpus_in_machines(); + + configure_rtc(qemu_find_opts_singleton("rtc")); + +-- +2.21.0 + diff --git a/SOURCES/0016-Add-support-for-simpletrace.patch b/SOURCES/0016-Add-support-for-simpletrace.patch new file mode 100644 index 0000000..9624855 --- /dev/null +++ b/SOURCES/0016-Add-support-for-simpletrace.patch @@ -0,0 +1,121 @@ +From 26128b3ede339e292a3c50a84e3248af46ecd0ec Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Oct 2015 09:50:17 +0200 +Subject: Add support for simpletrace + +As simpletrace is upstream, we just need to properly handle it during rpmbuild. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (3.1.0): +- Fixed python 2 to python3 switch + +Rebase notes (2.9.0): +- Added group argument for tracetool.py (upstream) + +Rebase notes (2.8.0): +- Changed tracetool.py parameters + +Merged patches (2.3.0): +- db959d6 redhat/qemu-kvm.spec.template: Install qemu-kvm-simpletrace.stp +- 5292fc3 trace: add SystemTap init scripts for simpletrace bridge +- eda9e5e simpletrace: install simpletrace.py +- 85c4c8f trace: add systemtap-initscript README file to RPM + +Signed-off-by: Danilo C. L. de Paula +--- + .gitignore | 2 ++ + Makefile | 4 +++ + README.systemtap | 43 +++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 26 ++++++++++++++- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + 6 files changed, 79 insertions(+), 1 deletion(-) + create mode 100644 README.systemtap + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/Makefile b/Makefile +index 086727dbb9..4254950f7f 100644 +--- a/Makefile ++++ b/Makefile +@@ -939,6 +939,10 @@ endif + $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ + done + $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" ++ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/script.d" ++ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/script.d/qemu_kvm.stp "$(DESTDIR)$(qemu_datadir)/systemtap/script.d/" ++ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d" ++ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/conf.d/qemu_kvm.conf "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d/" + + .PHONY: ctags + ctags: +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +-- +2.21.0 + diff --git a/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..ef83445 --- /dev/null +++ b/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,118 @@ +From 97ed62562b883c384346bfef3e1c7e379f03ccab Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 30 Nov 2018 09:11:03 +0100 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + docs/qemu-block-drivers.texi | 2 +- + docs/qemu-cpu-models.texi | 2 +- + qemu-doc.texi | 6 +++--- + qemu-options.hx | 16 ++++++++-------- + 4 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi +index 2c7ea49c32..5d0afb3dee 100644 +--- a/docs/qemu-block-drivers.texi ++++ b/docs/qemu-block-drivers.texi +@@ -2,7 +2,7 @@ + QEMU block driver reference manual + @c man end + +-@set qemu_system qemu-system-x86_64 ++@set qemu_system qemu-kvm + + @c man begin DESCRIPTION + +diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi +index f88a1def0d..c82cf8fab7 100644 +--- a/docs/qemu-cpu-models.texi ++++ b/docs/qemu-cpu-models.texi +@@ -2,7 +2,7 @@ + QEMU / KVM CPU model configuration + @c man end + +-@set qemu_system_x86 qemu-system-x86_64 ++@set qemu_system_x86 qemu-kvm + + @c man begin DESCRIPTION + +diff --git a/qemu-doc.texi b/qemu-doc.texi +index 3ddf5c0a68..d460f8d2c0 100644 +--- a/qemu-doc.texi ++++ b/qemu-doc.texi +@@ -11,8 +11,8 @@ + @paragraphindent 0 + @c %**end of header + +-@set qemu_system qemu-system-x86_64 +-@set qemu_system_x86 qemu-system-x86_64 ++@set qemu_system qemu-kvm ++@set qemu_system_x86 qemu-kvm + + @ifinfo + @direntry +@@ -1827,7 +1827,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. + Set OpenBIOS variables in NVRAM, for example: + + @example +-qemu-system-ppc -prom-env 'auto-boot?=false' \ ++qemu-kvm -prom-env 'auto-boot?=false' \ + -prom-env 'boot-device=hd:2,\yaboot' \ + -prom-env 'boot-args=conf=hd:2,\yaboot.conf' + @end example +diff --git a/qemu-options.hx b/qemu-options.hx +index fc17aca631..df1d27b6f2 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. + + Example: + @example +-qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + @end example + + @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] +@@ -3631,14 +3631,14 @@ ETEXI + + DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, + "-realtime [mlock=on|off]\n" +- " run qemu with realtime features\n" ++ " run qemu-kvm with realtime features\n" + " mlock=on|off controls mlock support (default: on)\n", + QEMU_ARCH_ALL) + STEXI + @item -realtime mlock=on|off + @findex -realtime +-Run qemu with realtime features. +-mlocking qemu and guest memory can be enabled via @option{mlock=on} ++Run qemu-kvm with realtime features. ++mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} + (enabled by default). + ETEXI + +-- +2.21.0 + diff --git a/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch b/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch new file mode 100644 index 0000000..bc6146d --- /dev/null +++ b/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -0,0 +1,96 @@ +From b13a7d3527c5c91e7a50236de30a2244b8453911 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 5 May 2017 19:06:14 +0200 +Subject: usb-xhci: Fix PCI capability order + +RH-Author: Dr. David Alan Gilbert +Message-id: <20170505190614.15987-2-dgilbert@redhat.com> +Patchwork-id: 75038 +O-Subject: [RHEL-7.4 qemu-kvm-rhev PATCH 1/1] usb-xhci: Fix PCI capability order +Bugzilla: 1447874 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Juan Quintela + +From: "Dr. David Alan Gilbert" + +Upstream commit 1108b2f8a9 in 2.7.0 changed the order +of the PCI capability chain in the XHCI pci device in the case +where the device has the PCIe endpoint capability (i.e. only +older machine types, pc-i440fx-2.0 upstream, pc-i440fx-rhel7.0.0 +apparently for us). + +Changing the order breaks migration compatibility; fixing this +upstream would mean breaking the same case going from 2.7.0->current +that currently works 2.7.0->2.9.0 - so upstream it's a choice +of two breakages. + +Since we never released 2.7.0/2.8.0 we can fix this downstream. + +This reverts the order so that we create the capabilities in the +order: + PCIe + MSI + MSI-X + +The symptom is: +qemu-kvm: get_pci_config_device: Bad config data: i=0x71 read: a0 device: 0 cmask: ff wmask: 0 w1cmask:0 +qemu-kvm: Failed to load PCIDevice:config +qemu-kvm: Failed to load xhci:parent_obj +qemu-kvm: error while loading state for instance 0x0 of device '0000:00:0d.0/xhci' +qemu-kvm: load of migration failed: Invalid argument + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina + +-- +Rebase notes (2.9.0): +- Change in assert condition (upstream) + +(cherry picked from commit aad727a5ecde1ad4935eb8427604d4df5a1f1f35) +(cherry picked from commit 2dd7402227e77d748a7375233ac9e7feab244bda) + +Conflicts: + hw/usb/hcd-xhci.c + +(cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) +(cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/hcd-xhci.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 8fed2eedd6..d2b9744030 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) + xhci->max_pstreams_mask = 0; + } + ++ if (pci_bus_is_express(pci_get_bus(dev)) || ++ xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { ++ ret = pcie_endpoint_cap_init(dev, 0xa0); ++ assert(ret > 0); ++ } ++ + if (xhci->msi != ON_OFF_AUTO_OFF) { + ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); + /* Any error other than -ENOTSUP(board's MSI support is broken) +@@ -3451,12 +3457,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) + PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, + &xhci->mem); + +- if (pci_bus_is_express(pci_get_bus(dev)) || +- xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { +- ret = pcie_endpoint_cap_init(dev, 0xa0); +- assert(ret > 0); +- } +- + if (xhci->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, xhci->numintrs, +-- +2.21.0 + diff --git a/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..e167b2e --- /dev/null +++ b/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,69 @@ +From 3fab8f5e8a9e190c1ed6916ac13c7c4d65e874b7 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +(cherry picked from commit b0caf00bbc35c7d89e02999bdce86e1f867728e8) +(cherry picked from commit c9c4f117d8b507c2f86035c282d537c0a327364f) +(cherry picked from commit 5d586bb2543337f0ff172c6ce942dba3acbcedff) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index e8b2b64d09..54108c0056 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + SCSIDevice *sd = SCSI_DEVICE(dev); + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.21.0 + diff --git a/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..b3350da --- /dev/null +++ b/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 148e9e80a3a430615b552075082fad22d007d851 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 481dfd2a27..805f38533e 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.21.0 + diff --git a/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch new file mode 100644 index 0000000..a2a800b --- /dev/null +++ b/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch @@ -0,0 +1,61 @@ +From ab9ebc29bb9bb142e73a160750a451d40bfe9746 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Mon, 16 Sep 2019 17:07:00 +0100 +Subject: Using ip_deq after m_free might read pointers from an allocation + reuse. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190916170700.647-2-philmd@redhat.com> +Patchwork-id: 90470 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] Using ip_deq after m_free might read pointers from an allocation reuse. +Bugzilla: 1749737 +RH-Acked-by: Danilo de Paula +RH-Acked-by: John Snow + +From: Samuel Thibault + +This would be difficult to exploit, but that is still related with +CVE-2019-14378 which generates fragmented IP packets that would trigger this +issue and at least produce a DoS. + +Signed-off-by: Samuel Thibault +(cherry picked from libslirp commit c59279437eda91841b9d26079c70b8a540d41204) +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip_input.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +index 8c75d91495..df1c846ade 100644 +--- a/slirp/src/ip_input.c ++++ b/slirp/src/ip_input.c +@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + */ + while (q != (struct ipasfrag *)&fp->frag_link && + ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; + i = (ip->ip_off + ip->ip_len) - q->ipf_off; + if (i < q->ipf_len) { + q->ipf_len -= i; +@@ -299,9 +300,11 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + m_adj(dtom(slirp, q), i); + break; + } ++ prev = q; + q = q->ipf_next; +- m_free(dtom(slirp, q->ipf_prev)); +- ip_deq(q->ipf_prev); ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ + } + + insert: +-- +2.21.0 + diff --git a/SOURCES/81-kvm-rhel.rules b/SOURCES/81-kvm-rhel.rules new file mode 100644 index 0000000..787cad6 --- /dev/null +++ b/SOURCES/81-kvm-rhel.rules @@ -0,0 +1 @@ +DEVPATH=="*/kvm", ACTION=="change", RUN+="/lib/udev/udev-kvm-check $env{COUNT} $env{EVENT}" diff --git a/SOURCES/85-kvm.preset b/SOURCES/85-kvm.preset new file mode 100644 index 0000000..8024052 --- /dev/null +++ b/SOURCES/85-kvm.preset @@ -0,0 +1,5 @@ +# Enable kvm-setup by default. This can have odd side effects on +# PowerNV systems that aren't intended as KVM hosts, but at present we +# only support RHEL on PowerNV for the purpose of being a RHEV host. + +enable kvm-setup.service diff --git a/SOURCES/95-kvm-memlock.conf b/SOURCES/95-kvm-memlock.conf new file mode 100644 index 0000000..fc59dbe --- /dev/null +++ b/SOURCES/95-kvm-memlock.conf @@ -0,0 +1,10 @@ +# The KVM HV implementation on Power can require a significant amount +# of unswappable memory (about half of which also needs to be host +# physically contiguous) to hold the guest's Hash Page Table (HPT) - +# roughly 1/64th of the guest's RAM size, minimum 16MiB. +# +# These limits allow unprivileged users to start smallish VMs, such as +# those used by libguestfs. +# +* hard memlock 65536 +* soft memlock 65536 diff --git a/SOURCES/99-qemu-guest-agent.rules b/SOURCES/99-qemu-guest-agent.rules new file mode 100644 index 0000000..8a290ab --- /dev/null +++ b/SOURCES/99-qemu-guest-agent.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \ + TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service" diff --git a/SOURCES/README.tests b/SOURCES/README.tests new file mode 100644 index 0000000..9932773 --- /dev/null +++ b/SOURCES/README.tests @@ -0,0 +1,39 @@ +qemu-kvm-tests README +===================== + +The qemu-kvm-tests rpm contains tests that can be used to verify the +functionality of the installed qemu-kvm package + +When installed, the files from this rpm will be arranged in the following +directory structure + +tests-src/ +├── README +├── scripts +│   ├── qemu.py +│   └── qmp +└── tests + ├── acceptance + ├── Makefile.include + └── qemu-iotests + +The tests/ directory within the tests-src/ directory is setup to remain a copy +of a subset of the tests/ directory from the QEMU source tree + +The avocado_qemu tests and qemu-iotests, along with files required for the +execution of the avocado_qemu tests (scripts/qemu.py and scripts/qmp/) will be +installed in a new location - /usr/lib64/qemu-kvm/tests-src/ + +avocado_qemu tests: +The avocado_qemu tests can be executed by running the following avocado command: +avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ +Avocado needs to be installed separately using either pip or from source as +Avocado is not being packaged for RHEL-8. + +qemu-iotests: +symlinks to corresponding binaries need to be created for QEMU_PROG, +QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be +executed. + +The primary purpose of this package is to make these tests available to be +executed as gating tests for the virt module in the RHEL-8 OSCI environment. diff --git a/SOURCES/bridge.conf b/SOURCES/bridge.conf new file mode 100644 index 0000000..a573665 --- /dev/null +++ b/SOURCES/bridge.conf @@ -0,0 +1 @@ +allow virbr0 diff --git a/SOURCES/ksm.service b/SOURCES/ksm.service new file mode 100644 index 0000000..35c6f1d --- /dev/null +++ b/SOURCES/ksm.service @@ -0,0 +1,13 @@ +[Unit] +Description=Kernel Samepage Merging +ConditionPathExists=/sys/kernel/mm/ksm + +[Service] +Type=oneshot +RemainAfterExit=yes +EnvironmentFile=-/etc/sysconfig/ksm +ExecStart=/usr/libexec/ksmctl start +ExecStop=/usr/libexec/ksmctl stop + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/ksm.sysconfig b/SOURCES/ksm.sysconfig new file mode 100644 index 0000000..d99656d --- /dev/null +++ b/SOURCES/ksm.sysconfig @@ -0,0 +1,4 @@ +# The maximum number of unswappable kernel pages +# which may be allocated by ksm (0 for unlimited) +# If unset, defaults to half of total memory +# KSM_MAX_KERNEL_PAGES= diff --git a/SOURCES/ksmctl.c b/SOURCES/ksmctl.c new file mode 100644 index 0000000..af39591 --- /dev/null +++ b/SOURCES/ksmctl.c @@ -0,0 +1,77 @@ +/* Start/stop KSM, for systemd. + * Copyright (C) 2009, 2011 Red Hat, Inc. + * Written by Paolo Bonzini . + * Based on the original sysvinit script by Dan Kenigsberg + * This file is distributed under the GNU General Public License, version 2 + * or later. */ + +#include +#include +#include +#include +#include +#include + +#define KSM_MAX_KERNEL_PAGES_FILE "/sys/kernel/mm/ksm/max_kernel_pages" +#define KSM_RUN_FILE "/sys/kernel/mm/ksm/run" + +char *program_name; + +int usage(void) +{ + fprintf(stderr, "Usage: %s {start|stop}\n", program_name); + return 1; +} + +int write_value(uint64_t value, char *filename) +{ + FILE *fp; + if (!(fp = fopen(filename, "w")) || + fprintf(fp, "%llu\n", (unsigned long long) value) == EOF || + fflush(fp) == EOF || + fclose(fp) == EOF) + return 1; + + return 0; +} + +uint64_t ksm_max_kernel_pages() +{ + char *var = getenv("KSM_MAX_KERNEL_PAGES"); + char *endptr; + uint64_t value; + if (var && *var) { + value = strtoll(var, &endptr, 0); + if (value < LLONG_MAX && !*endptr) + return value; + } + /* Unless KSM_MAX_KERNEL_PAGES is set, let KSM munch up to half of + * total memory. */ + return sysconf(_SC_PHYS_PAGES) / 2; +} + +int start(void) +{ + if (access(KSM_MAX_KERNEL_PAGES_FILE, R_OK) >= 0) + write_value(ksm_max_kernel_pages(), KSM_MAX_KERNEL_PAGES_FILE); + return write_value(1, KSM_RUN_FILE); +} + +int stop(void) +{ + return write_value(0, KSM_RUN_FILE); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + if (argc < 2) { + return usage(); + } else if (!strcmp(argv[1], "start")) { + return start(); + } else if (!strcmp(argv[1], "stop")) { + return stop(); + } else { + return usage(); + } +} diff --git a/SOURCES/ksmtuned b/SOURCES/ksmtuned new file mode 100644 index 0000000..7bc5743 --- /dev/null +++ b/SOURCES/ksmtuned @@ -0,0 +1,139 @@ +#!/bin/bash +# +# Copyright 2009 Red Hat, Inc. and/or its affiliates. +# Released under the GPL +# +# Author: Dan Kenigsberg +# +# ksmtuned - a simple script that controls whether (and with what vigor) ksm +# should search for duplicated pages. +# +# starts ksm when memory commited to qemu processes exceeds a threshold, and +# make ksm work harder and harder untill memory load falls below that +# threshold. +# +# send SIGUSR1 to this process right after a new qemu process is started, or +# following its death, to retune ksm accordingly +# +# needs testing and ironing. contact danken@redhat.com if something breaks. + +if [ -f /etc/ksmtuned.conf ]; then + . /etc/ksmtuned.conf +fi + +debug() { + if [ -n "$DEBUG" ]; then + s="`/bin/date`: $*" + [ -n "$LOGFILE" ] && echo "$s" >> "$LOGFILE" || echo "$s" + fi +} + + +KSM_MONITOR_INTERVAL=${KSM_MONITOR_INTERVAL:-60} +KSM_NPAGES_BOOST=${KSM_NPAGES_BOOST:-300} +KSM_NPAGES_DECAY=${KSM_NPAGES_DECAY:--50} + +KSM_NPAGES_MIN=${KSM_NPAGES_MIN:-64} +KSM_NPAGES_MAX=${KSM_NPAGES_MAX:-1250} +# millisecond sleep between ksm scans for 16Gb server. Smaller servers sleep +# more, bigger sleep less. +KSM_SLEEP_MSEC=${KSM_SLEEP_MSEC:-10} + +KSM_THRES_COEF=${KSM_THRES_COEF:-20} +KSM_THRES_CONST=${KSM_THRES_CONST:-2048} + +total=`awk '/^MemTotal:/ {print $2}' /proc/meminfo` +debug total $total + +npages=0 +sleep=$[KSM_SLEEP_MSEC * 16 * 1024 * 1024 / total] +[ $sleep -le 10 ] && sleep=10 +debug sleep $sleep +thres=$[total * KSM_THRES_COEF / 100] +if [ $KSM_THRES_CONST -gt $thres ]; then + thres=$KSM_THRES_CONST +fi +debug thres $thres + +KSMCTL () { + case x$1 in + xstop) + echo 0 > /sys/kernel/mm/ksm/run + ;; + xstart) + echo $2 > /sys/kernel/mm/ksm/pages_to_scan + echo $3 > /sys/kernel/mm/ksm/sleep_millisecs + echo 1 > /sys/kernel/mm/ksm/run + ;; + esac +} + +committed_memory () { + # calculate how much memory is committed to running qemu processes + local pidlist + pidlist=$(pgrep -d ' ' -- '^qemu(-(kvm|system-.+)|:.{1,11})$') + if [ -n "$pidlist" ]; then + ps -p "$pidlist" -o rsz= + fi | awk '{ sum += $1 }; END { print 0+sum }' +} + +free_memory () { + awk '/^(MemFree|Buffers|Cached):/ {free += $2}; END {print free}' \ + /proc/meminfo +} + +increase_npages() { + local delta + delta=${1:-0} + npages=$[npages + delta] + if [ $npages -lt $KSM_NPAGES_MIN ]; then + npages=$KSM_NPAGES_MIN + elif [ $npages -gt $KSM_NPAGES_MAX ]; then + npages=$KSM_NPAGES_MAX + fi + echo $npages +} + + +adjust () { + local free committed + free=`free_memory` + committed=`committed_memory` + debug committed $committed free $free + if [ $[committed + thres] -lt $total -a $free -gt $thres ]; then + KSMCTL stop + debug "$[committed + thres] < $total and free > $thres, stop ksm" + return 1 + fi + debug "$[committed + thres] > $total, start ksm" + if [ $free -lt $thres ]; then + npages=`increase_npages $KSM_NPAGES_BOOST` + debug "$free < $thres, boost" + else + npages=`increase_npages $KSM_NPAGES_DECAY` + debug "$free > $thres, decay" + fi + KSMCTL start $npages $sleep + debug "KSMCTL start $npages $sleep" + return 0 +} + +function nothing () { + : +} + +loop () { + trap nothing SIGUSR1 + while true + do + sleep $KSM_MONITOR_INTERVAL & + wait $! + adjust + done +} + +PIDFILE=${PIDFILE-/var/run/ksmtune.pid} +if touch "$PIDFILE"; then + loop & + echo $! > "$PIDFILE" +fi diff --git a/SOURCES/ksmtuned.conf b/SOURCES/ksmtuned.conf new file mode 100644 index 0000000..fc4518c --- /dev/null +++ b/SOURCES/ksmtuned.conf @@ -0,0 +1,21 @@ +# Configuration file for ksmtuned. + +# How long ksmtuned should sleep between tuning adjustments +# KSM_MONITOR_INTERVAL=60 + +# Millisecond sleep between ksm scans for 16Gb server. +# Smaller servers sleep more, bigger sleep less. +# KSM_SLEEP_MSEC=10 + +# KSM_NPAGES_BOOST=300 +# KSM_NPAGES_DECAY=-50 +# KSM_NPAGES_MIN=64 +# KSM_NPAGES_MAX=1250 + +# KSM_THRES_COEF=20 +# KSM_THRES_CONST=2048 + +# uncomment the following if you want ksmtuned debug info + +# LOGFILE=/var/log/ksmtuned +# DEBUG=1 diff --git a/SOURCES/ksmtuned.service b/SOURCES/ksmtuned.service new file mode 100644 index 0000000..39febcc --- /dev/null +++ b/SOURCES/ksmtuned.service @@ -0,0 +1,12 @@ +[Unit] +Description=Kernel Samepage Merging (KSM) Tuning Daemon +After=ksm.service +Requires=ksm.service + +[Service] +ExecStart=/usr/sbin/ksmtuned +ExecReload=/bin/kill -USR1 $MAINPID +Type=forking + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch b/SOURCES/kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch new file mode 100644 index 0000000..7310f17 --- /dev/null +++ b/SOURCES/kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch @@ -0,0 +1,41 @@ +From ff8529dcbf86b3a086d64dd630cf6a687603c571 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:55 +0100 +Subject: [PATCH 12/12] ACPI: add expected files for HMAT tests (acpihmat) + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-12-plai@redhat.com> +Patchwork-id: 96742 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 11/11] ACPI: add expected files for HMAT tests (acpihmat) +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: "Michael S. Tsirkin" + +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 48892c6c8def6624a0ed57e2bd6c2a0a9878b973) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + tests/bios-tables-test-allowed-diff.h | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h +index 3c9e0c9..dfb8523 100644 +--- a/tests/bios-tables-test-allowed-diff.h ++++ b/tests/bios-tables-test-allowed-diff.h +@@ -1,9 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/pc/APIC.acpihmat", +-"tests/data/acpi/pc/SRAT.acpihmat", +-"tests/data/acpi/pc/HMAT.acpihmat", +-"tests/data/acpi/pc/DSDT.acpihmat", +-"tests/data/acpi/q35/APIC.acpihmat", +-"tests/data/acpi/q35/SRAT.acpihmat", +-"tests/data/acpi/q35/HMAT.acpihmat", +-"tests/data/acpi/q35/DSDT.acpihmat", +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Don-t-leak-memory-when-reallocation-fails.patch b/SOURCES/kvm-Don-t-leak-memory-when-reallocation-fails.patch new file mode 100644 index 0000000..5747672 --- /dev/null +++ b/SOURCES/kvm-Don-t-leak-memory-when-reallocation-fails.patch @@ -0,0 +1,58 @@ +From bcb6107f98d7b1edf687d7afd552a4528b7e673b Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Tue, 12 May 2020 21:15:13 +0100 +Subject: [PATCH 2/7] Don't leak memory when reallocation fails. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200512211514.1398384-2-jmaloy@redhat.com> +Patchwork-id: 96412 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] Don't leak memory when reallocation fails. +Bugzilla: 1749737 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: Jindrich Novy + +Signed-off-by: Jindrich Novy +[ Marc-André - modified to use a temporary variable ] +Signed-off-by: Marc-André Lureau +(cherry picked from libslirp commit d171af3732a0610a25334b06b77fa547bd677918) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/sbuf.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +index abced48..0569c34 100644 +--- a/slirp/src/sbuf.c ++++ b/slirp/src/sbuf.c +@@ -39,13 +39,16 @@ void sbreserve(struct sbuf *sb, int size) + if (sb->sb_data) { + /* Already alloced, realloc if necessary */ + if (sb->sb_datalen != size) { +- sb->sb_wptr = sb->sb_rptr = sb->sb_data = +- (char *)realloc(sb->sb_data, size); ++ char *new = realloc(sb->sb_data, size); + sb->sb_cc = 0; +- if (sb->sb_wptr) ++ if (new) { ++ sb->sb_data = sb->sb_wptr = sb->sb_rptr = new; + sb->sb_datalen = size; +- else ++ } else { ++ free(sb->sb_data); ++ sb->sb_data = sb->sb_wptr = sb->sb_rptr = NULL; + sb->sb_datalen = 0; ++ } + } + } else { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch b/SOURCES/kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch new file mode 100644 index 0000000..535c3af --- /dev/null +++ b/SOURCES/kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch @@ -0,0 +1,60 @@ +From a33ea192428d9c9307f1140f3e25631a6ef7657c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Sat, 20 Jun 2020 15:02:59 -0400 +Subject: [PATCH 12/12] Fix use-afte-free in ip_reass() (CVE-2020-1983) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20200620150259.3352467-2-jmaloy@redhat.com> +Patchwork-id: 97678 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] Fix use-afte-free in ip_reass() (CVE-2020-1983) +Bugzilla: 1838070 +RH-Acked-by: Stefan Hajnoczi + +From: Marc-André Lureau + +The q pointer is updated when the mbuf data is moved from m_dat to +m_ext. + +m_ext buffer may also be realloc()'ed and moved during m_cat(): +q should also be updated in this case. + +Reported-by: Aviv Sasson +Signed-off-by: Marc-André Lureau +Reviewed-by: Samuel Thibault + +(cherry picked from libslirp commit 9bd6c5913271eabcb7768a58197ed3301fe19f2d) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip_input.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +index df1c846ade..0f5d522ec1 100644 +--- a/slirp/src/ip_input.c ++++ b/slirp/src/ip_input.c +@@ -329,7 +329,7 @@ insert: + q = fp->frag_link.next; + m = dtom(slirp, q); + +- int was_ext = m->m_flags & M_EXT; ++ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); + + q = (struct ipasfrag *)q->ipf_next; + while (q != (struct ipasfrag *)&fp->frag_link) { +@@ -353,8 +353,7 @@ insert: + * the old buffer (in the mbuf), so we must point ip + * into the new buffer. + */ +- if (!was_ext && m->m_flags & M_EXT) { +- int delta = (char *)q - m->m_dat; ++ if (m->m_flags & M_EXT) { + q = (struct ipasfrag *)(m->m_ext + delta); + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch b/SOURCES/kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch new file mode 100644 index 0000000..dce89d9 --- /dev/null +++ b/SOURCES/kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch @@ -0,0 +1,55 @@ +From e3bec8c83459a68ae0c08e2ae0f1dbef24872d59 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:09 +0100 +Subject: [PATCH 04/26] MAINTAINERS: fix qcow2-bitmap.c under Dirty Bitmaps + header + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-2-eblake@redhat.com> +Patchwork-id: 97068 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 01/12] MAINTAINERS: fix qcow2-bitmap.c under Dirty Bitmaps header +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +From: Vladimir Sementsov-Ogievskiy + +Somehow I wrote not full path to the file. Fix that. + +Also, while being here, rearrange entries, so that includes go first, +then block, than migration, than util. + +Fixes: 052db8e71444d +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 00637c6b0b67694127cc01dd75f3626da23acdaa) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + MAINTAINERS | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/MAINTAINERS b/MAINTAINERS +index d1b3e26..3a81ac9 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1873,12 +1873,12 @@ M: John Snow + R: Vladimir Sementsov-Ogievskiy + L: qemu-block@nongnu.org + S: Supported +-F: util/hbitmap.c +-F: block/dirty-bitmap.c + F: include/qemu/hbitmap.h + F: include/block/dirty-bitmap.h +-F: qcow2-bitmap.c ++F: block/dirty-bitmap.c ++F: block/qcow2-bitmap.c + F: migration/block-dirty-bitmap.c ++F: util/hbitmap.c + F: tests/test-hbitmap.c + F: docs/interop/bitmaps.rst + T: git https://github.com/jnsnow/qemu.git bitmaps +-- +1.8.3.1 + diff --git a/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch b/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch new file mode 100644 index 0000000..1435017 --- /dev/null +++ b/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch @@ -0,0 +1,53 @@ +From 481357ea8ae32b6894860c296cf6a2898260195f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 17 Jan 2020 13:18:27 +0100 +Subject: [PATCH 4/4] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR + support + +RH-Author: Paolo Bonzini +Message-id: <20200117131827.20361-1-pbonzini@redhat.com> +Patchwork-id: 93405 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v3] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support +Bugzilla: 1559846 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Miroslav Rezanina + +BZ: 1559846 +BRANCH: rhel-av-8.2.0 +BREW: 25775160 +UPSTREAM: RHEL only + +Nested PERF_GLOBAL_CTRL support is not present in the 8.2 kernel. Drop the +features via compat properties, they will be moved to 8.2 machine type compat +properties in the 8.3 timeframe. + +Signed-off-by: Paolo Bonzini +--- + No change, for v2 I mistakenly wrote "origin/rhel-av-8.2.0" as the + branch. :( + + hw/i386/pc.c | 2 ++ + 1 file changed, 2 insertions(+) + +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 61e70e4..73a0f11 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -351,6 +351,8 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, + }; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch new file mode 100644 index 0000000..d717ae2 --- /dev/null +++ b/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch @@ -0,0 +1,115 @@ +From c477581ccc6962651d4d6c702a6c3e2fcc5e4205 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 2 Jan 2020 11:56:51 +0000 +Subject: [PATCH 2/2] kvm: Reallocate dirty_bmap when we change a slot + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200102115651.140177-1-dgilbert@redhat.com> +Patchwork-id: 93256 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] kvm: Reallocate dirty_bmap when we change a slot +Bugzilla: 1772774 +RH-Acked-by: Peter Xu +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Laszlo Ersek + +From: "Dr. David Alan Gilbert" + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25575691 +branch: rhel-av-8.2.0 + +kvm_set_phys_mem can be called to reallocate a slot by something the +guest does (e.g. writing to PAM and other chipset registers). +This can happen in the middle of a migration, and if we're unlucky +it can now happen between the split 'sync' and 'clear'; the clear +asserts if there's no bmap to clear. Recreate the bmap whenever +we change the slot, keeping the clear path happy. + +Typically this is triggered by the guest rebooting during a migrate. + +Corresponds to: +https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +https://bugzilla.redhat.com/show_bug.cgi?id=1771032 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +(cherry picked from commit 9b3a31c745b61758aaa5466a3a9fc0526d409188) +Signed-off-by: Danilo C. L. de Paula +--- + accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- + 1 file changed, 29 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index dc3ed7f..5007bda 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -518,6 +518,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, + + #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + ++/* Allocate the dirty bitmap for a slot */ ++static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) ++{ ++ /* ++ * XXX bad kernel interface alert ++ * For dirty bitmap, kernel allocates array of size aligned to ++ * bits-per-long. But for case when the kernel is 64bits and ++ * the userspace is 32bits, userspace can't align to the same ++ * bits-per-long, since sizeof(long) is different between kernel ++ * and user space. This way, userspace will provide buffer which ++ * may be 4 bytes less than the kernel will use, resulting in ++ * userspace memory corruption (which is not detectable by valgrind ++ * too, in most cases). ++ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in ++ * a hope that sizeof(long) won't become >8 any time soon. ++ */ ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; ++ mem->dirty_bmap = g_malloc0(bitmap_size); ++} ++ + /** + * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space + * +@@ -550,23 +571,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- /* XXX bad kernel interface alert +- * For dirty bitmap, kernel allocates array of size aligned to +- * bits-per-long. But for case when the kernel is 64bits and +- * the userspace is 32bits, userspace can't align to the same +- * bits-per-long, since sizeof(long) is different between kernel +- * and user space. This way, userspace will provide buffer which +- * may be 4 bytes less than the kernel will use, resulting in +- * userspace memory corruption (which is not detectable by valgrind +- * too, in most cases). +- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in +- * a hope that sizeof(long) won't become >8 any time soon. +- */ + if (!mem->dirty_bmap) { +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(bitmap_size); ++ kvm_memslot_init_dirty_bitmap(mem); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -1067,6 +1074,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); + ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ /* ++ * Reallocate the bmap; it means it doesn't disappear in ++ * middle of a migrate. ++ */ ++ kvm_memslot_init_dirty_bitmap(mem); ++ } + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Replace-remaining-malloc-free-user-with-glib.patch b/SOURCES/kvm-Replace-remaining-malloc-free-user-with-glib.patch new file mode 100644 index 0000000..71e6e47 --- /dev/null +++ b/SOURCES/kvm-Replace-remaining-malloc-free-user-with-glib.patch @@ -0,0 +1,118 @@ +From c012dc9b501d96a2ff54a8a7a182726043b69aeb Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Tue, 12 May 2020 21:15:14 +0100 +Subject: [PATCH 3/7] Replace remaining malloc/free user with glib +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200512211514.1398384-3-jmaloy@redhat.com> +Patchwork-id: 96413 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] Replace remaining malloc/free user with glib +Bugzilla: 1749737 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: Marc-André Lureau + +glib mem functions are already used in various places. Let's not mix +the two, and instead abort on OOM conditions. + +Signed-off-by: Marc-André Lureau +(cherry picked from libslirp commit 3a494648526be4eb96cba739a816a60e933ffd14) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/sbuf.c | 21 ++++++--------------- + slirp/src/socket.c | 2 +- + slirp/src/tcp_subr.c | 8 ++------ + 3 files changed, 9 insertions(+), 22 deletions(-) + +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +index 0569c34..eab87f3 100644 +--- a/slirp/src/sbuf.c ++++ b/slirp/src/sbuf.c +@@ -9,7 +9,7 @@ static void sbappendsb(struct sbuf *sb, struct mbuf *m); + + void sbfree(struct sbuf *sb) + { +- free(sb->sb_data); ++ g_free(sb->sb_data); + } + + bool sbdrop(struct sbuf *sb, int num) +@@ -39,24 +39,15 @@ void sbreserve(struct sbuf *sb, int size) + if (sb->sb_data) { + /* Already alloced, realloc if necessary */ + if (sb->sb_datalen != size) { +- char *new = realloc(sb->sb_data, size); ++ char *new = g_realloc(sb->sb_data, size); + sb->sb_cc = 0; +- if (new) { +- sb->sb_data = sb->sb_wptr = sb->sb_rptr = new; +- sb->sb_datalen = size; +- } else { +- free(sb->sb_data); +- sb->sb_data = sb->sb_wptr = sb->sb_rptr = NULL; +- sb->sb_datalen = 0; +- } ++ sb->sb_data = sb->sb_wptr = sb->sb_rptr = new; ++ sb->sb_datalen = size; + } + } else { +- sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_malloc(size); + sb->sb_cc = 0; +- if (sb->sb_wptr) +- sb->sb_datalen = size; +- else +- sb->sb_datalen = 0; ++ sb->sb_datalen = size; + } + } + +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +index 34daffc..ace18bf 100644 +--- a/slirp/src/socket.c ++++ b/slirp/src/socket.c +@@ -95,7 +95,7 @@ void sofree(struct socket *so) + remque(so); /* crashes if so is not in a queue */ + + if (so->so_tcpcb) { +- free(so->so_tcpcb); ++ g_free(so->so_tcpcb); + } + g_free(so); + } +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index 26d4ead..4e5a801 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -255,11 +255,7 @@ struct tcpcb *tcp_newtcpcb(struct socket *so) + { + register struct tcpcb *tp; + +- tp = (struct tcpcb *)malloc(sizeof(*tp)); +- if (tp == NULL) +- return ((struct tcpcb *)0); +- +- memset((char *)tp, 0, sizeof(struct tcpcb)); ++ tp = g_new0(struct tcpcb, 1); + tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; + tp->t_maxseg = (so->so_ffamily == AF_INET) ? TCP_MSS : TCP6_MSS; + +@@ -330,7 +326,7 @@ struct tcpcb *tcp_close(struct tcpcb *tp) + remque(tcpiphdr2qlink(tcpiphdr_prev(t))); + m_free(m); + } +- free(tp); ++ g_free(tp); + so->so_tcpcb = NULL; + /* clobber input socket cache if we're closing the cached connection */ + if (so == slirp->tcp_last_so) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Revert-RHEL-disable-hostmem-memfd.patch b/SOURCES/kvm-Revert-RHEL-disable-hostmem-memfd.patch new file mode 100644 index 0000000..f959752 --- /dev/null +++ b/SOURCES/kvm-Revert-RHEL-disable-hostmem-memfd.patch @@ -0,0 +1,58 @@ +From 559d5899473dea180ced39a32bfbfbf2310c6e04 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 25 May 2020 15:33:06 +0100 +Subject: [PATCH 4/7] Revert "RHEL: disable hostmem-memfd" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200525153306.15373-1-marcandre.lureau@redhat.com> +Patchwork-id: 96747 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH] Revert "RHEL: disable hostmem-memfd" +Bugzilla: 1839030 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefano Garzarella + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1839030 +BRANCH: rhel-av-8.2.1 +UPSTREAM: RHEL-only +BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28817132 + +This reverts commit f7587ddb9a2731bf678a24156b6285dda79a4b2b. + +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + backends/Makefile.objs | 3 +-- + util/memfd.c | 2 +- + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/backends/Makefile.objs b/backends/Makefile.objs +index f328d40..f069111 100644 +--- a/backends/Makefile.objs ++++ b/backends/Makefile.objs +@@ -16,5 +16,4 @@ endif + + common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o + +-# RHEL: disable memfd +-# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o ++common-obj-$(CONFIG_LINUX) += hostmem-memfd.o +diff --git a/util/memfd.c b/util/memfd.c +index 3303ec9..4a3c07e 100644 +--- a/util/memfd.c ++++ b/util/memfd.c +@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) + */ + bool qemu_memfd_check(unsigned int flags) + { +-#if 0 /* RHEL: memfd support disabled */ ++#ifdef CONFIG_LINUX + int mfd = memfd_create("test", flags | MFD_CLOEXEC); + + if (mfd >= 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch b/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch new file mode 100644 index 0000000..0c1c37f --- /dev/null +++ b/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch @@ -0,0 +1,121 @@ +From 71b5267ed33f9e60bc98acbabcbed62f01a96ff4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 30 Mar 2020 11:19:23 +0100 +Subject: [PATCH 3/4] Revert "mirror: Don't let an operation wait for itself" + +RH-Author: Kevin Wolf +Message-id: <20200330111924.22938-2-kwolf@redhat.com> +Patchwork-id: 94464 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] Revert "mirror: Don't let an operation wait for itself" +Bugzilla: 1794692 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +This reverts commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca. + +The fix was incomplete as it only protected against requests waiting for +themselves, but not against requests waiting for each other. We need a +different solution. + +Signed-off-by: Kevin Wolf +Message-Id: <20200326153628.4869-2-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 9178f4fe5f083064f5c91f04d98c815ce5a5af1c) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index cacbc70..8959e42 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -283,14 +283,11 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) ++mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { +- if (self == op) { +- continue; +- } + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op +@@ -305,10 +302,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) + } + + static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) + { + /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, self, false); ++ mirror_wait_for_any_operation(s, false); + } + + /* Perform a mirror copy operation. +@@ -351,7 +348,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, op); ++ mirror_wait_for_free_in_flight_slot(s); + } + + /* Now make a QEMUIOVector taking enough granularity-sized chunks +@@ -558,7 +555,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, pseudo_op); ++ mirror_wait_for_free_in_flight_slot(s); + } + + if (s->ret < 0) { +@@ -612,7 +609,7 @@ static void mirror_free_init(MirrorBlockJob *s) + static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) + { + while (s->in_flight > 0) { +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + } + } + +@@ -797,7 +794,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + continue; + } + +@@ -950,7 +947,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, NULL, true); ++ mirror_wait_for_any_operation(s, true); + } + + if (s->ret < 0) { +@@ -976,7 +973,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + continue; + } else if (cnt != 0) { + delay_ns = mirror_iteration(s); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch b/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch new file mode 100644 index 0000000..dc65c26 --- /dev/null +++ b/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch @@ -0,0 +1,63 @@ +From ceb6d97674b8bc9a072db1be4167411bc0ee48d7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:02 +0100 +Subject: [PATCH 091/116] Virtiofsd: fix memory leak on fuse queueinfo +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-88-dgilbert@redhat.com> +Patchwork-id: 93542 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 087/112] Virtiofsd: fix memory leak on fuse queueinfo +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +For fuse's queueinfo, both queueinfo array and queueinfos are allocated in +fv_queue_set_started() but not cleaned up when the daemon process quits. + +This fixes the leak in proper places. + +Signed-off-by: Liu Bo +Signed-off-by: Eric Ren +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 740b0b700a6338a1cf60c26229651ac5f6724944) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index b7948de..fb8d6d1 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -625,6 +625,8 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) + } + close(ourqi->kill_fd); + ourqi->kick_fd = -1; ++ free(vud->qi[qidx]); ++ vud->qi[qidx] = NULL; + } + + /* Callback from libvhost-user on start or stop of a queue */ +@@ -884,6 +886,12 @@ int virtio_session_mount(struct fuse_session *se) + void virtio_session_close(struct fuse_session *se) + { + close(se->vu_socketfd); ++ ++ if (!se->virtio_dev) { ++ return; ++ } ++ ++ free(se->virtio_dev->qi); + free(se->virtio_dev); + se->virtio_dev = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch new file mode 100644 index 0000000..becba21 --- /dev/null +++ b/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch @@ -0,0 +1,62 @@ +From 0d5a09173eb75b7e56122c2aefb2646a2be58400 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:57 +0000 +Subject: [PATCH 15/15] apic: Use 32bit APIC ID for migration instance ID + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-4-peterx@redhat.com> +Patchwork-id: 93628 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] apic: Use 32bit APIC ID for migration instance ID +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +Migration is silently broken now with x2apic config like this: + + -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ + -device intel-iommu,intremap=on,eim=on + +After migration, the guest kernel could hang at anything, due to +x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so +any operations related to x2apic could be broken then (e.g., RDMSR on +x2apic MSRs could fail because KVM would think that the vcpu hasn't +enabled x2apic at all). + +The issue is that the x2apic bit was never applied correctly for vcpus +whose ID > 255 when migrate completes, and that's because when we +migrate APIC we use the APICCommonState.id as instance ID of the +migration stream, while that's too short for x2apic. + +Let's use the newly introduced initial_apic_id for that. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: Eduardo Habkost +Signed-off-by: Juan Quintela +(cherry picked from commit 0ab994867c365db21e15f9503922c79234d8e40e) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/apic_common.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index 54b8731..b5dbeb6 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- uint32_t instance_id = s->id; ++ uint32_t instance_id = s->initial_apic_id; ++ ++ /* Normally initial APIC ID should be no more than hundreds */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-backup-Improve-error-for-bdrv_getlength-failure.patch b/SOURCES/kvm-backup-Improve-error-for-bdrv_getlength-failure.patch new file mode 100644 index 0000000..8fa2629 --- /dev/null +++ b/SOURCES/kvm-backup-Improve-error-for-bdrv_getlength-failure.patch @@ -0,0 +1,51 @@ +From fba183faf8ce819262a1a47f8531ea68051cdce7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:19 +0100 +Subject: [PATCH 20/26] backup: Improve error for bdrv_getlength() failure + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-6-kwolf@redhat.com> +Patchwork-id: 97103 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 05/11] backup: Improve error for bdrv_getlength() failure +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +bdrv_get_device_name() will be an empty string with modern management +tools that don't use -drive. Use bdrv_get_device_or_node_name() instead +so that the node name is used if the BlockBackend is anonymous. + +While at it, start with upper case to make the message consistent with +the rest of the function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Message-Id: <20200430142755.315494-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 58226634c4b02af7b10862f7fbd3610a344bfb7f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index ec50946..7c6ddd2 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -408,8 +408,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + + len = bdrv_getlength(bs); + if (len < 0) { +- error_setg_errno(errp, -len, "unable to get length for '%s'", +- bdrv_get_device_name(bs)); ++ error_setg_errno(errp, -len, "Unable to get length for '%s'", ++ bdrv_get_device_or_node_name(bs)); + goto error; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-backup-Make-sure-that-source-and-target-size-match.patch b/SOURCES/kvm-backup-Make-sure-that-source-and-target-size-match.patch new file mode 100644 index 0000000..05b5d10 --- /dev/null +++ b/SOURCES/kvm-backup-Make-sure-that-source-and-target-size-match.patch @@ -0,0 +1,124 @@ +From e56abd782be8bb41bb07c0317d008f95ec9a8ee5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:20 +0100 +Subject: [PATCH 21/26] backup: Make sure that source and target size match + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-7-kwolf@redhat.com> +Patchwork-id: 97107 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 06/11] backup: Make sure that source and target size match +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +Since the introduction of a backup filter node in commit 00e30f05d, the +backup block job crashes when the target image is smaller than the +source image because it will try to write after the end of the target +node without having BLK_PERM_RESIZE. (Previously, the BlockBackend layer +would have caught this and errored out gracefully.) + +We can fix this and even do better than the old behaviour: Check that +source and target have the same image size at the start of the block job +and unshare BLK_PERM_RESIZE. (This permission was already unshared +before the same commit 00e30f05d, but the BlockBackend that was used to +make the restriction was removed without a replacement.) This will +immediately error out when starting the job instead of only when writing +to a block that doesn't exist in the target. + +Longer target than source would technically work because we would never +write to blocks that don't exist, but semantically these are invalid, +too, because a backup is supposed to create a copy, not just an image +that starts with a copy. + +Fixes: 00e30f05de1d19586345ec373970ef4c192c6270 +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1778593 +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20200430142755.315494-4-kwolf@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 958a04bd32af18d9a207bcc78046e56a202aebc2) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/backup-top.c | 14 +++++++++----- + block/backup.c | 14 +++++++++++++- + 2 files changed, 22 insertions(+), 6 deletions(-) + +diff --git a/block/backup-top.c b/block/backup-top.c +index b8d863f..6756091 100644 +--- a/block/backup-top.c ++++ b/block/backup-top.c +@@ -143,8 +143,10 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, + * + * Share write to target (child_file), to not interfere + * with guest writes to its disk which may be in target backing chain. ++ * Can't resize during a backup block job because we check the size ++ * only upfront. + */ +- *nshared = BLK_PERM_ALL; ++ *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; + *nperm = BLK_PERM_WRITE; + } else { + /* Source child */ +@@ -154,7 +156,7 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, + if (perm & BLK_PERM_WRITE) { + *nperm = *nperm | BLK_PERM_CONSISTENT_READ; + } +- *nshared &= ~BLK_PERM_WRITE; ++ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } + } + +@@ -187,10 +189,12 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, + { + Error *local_err = NULL; + BDRVBackupTopState *state; +- BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter, +- filter_node_name, +- BDRV_O_RDWR, errp); ++ BlockDriverState *top; ++ ++ assert(source->total_sectors == target->total_sectors); + ++ top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name, ++ BDRV_O_RDWR, errp); + if (!top) { + return NULL; + } +diff --git a/block/backup.c b/block/backup.c +index 7c6ddd2..821c9fb 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -348,7 +348,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque, + JobTxn *txn, Error **errp) + { +- int64_t len; ++ int64_t len, target_len; + BackupBlockJob *job = NULL; + int64_t cluster_size; + BdrvRequestFlags write_flags; +@@ -413,6 +413,18 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + goto error; + } + ++ target_len = bdrv_getlength(target); ++ if (target_len < 0) { ++ error_setg_errno(errp, -target_len, "Unable to get length for '%s'", ++ bdrv_get_device_or_node_name(bs)); ++ goto error; ++ } ++ ++ if (target_len != len) { ++ error_setg(errp, "Source and target image have different sizes"); ++ goto error; ++ } ++ + cluster_size = backup_calculate_cluster_size(target, errp); + if (cluster_size < 0) { + goto error; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch b/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch new file mode 100644 index 0000000..7fb76c1 --- /dev/null +++ b/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch @@ -0,0 +1,57 @@ +From 619b3aac9790a7ca7c01846144395a318a9ab250 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:14 +0100 +Subject: [PATCH 3/6] backup: don't acquire aio_context in backup_clean + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-4-kwolf@redhat.com> +Patchwork-id: 94596 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] backup: don't acquire aio_context in backup_clean +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +All code-paths leading to backup_clean (via job_clean) have the job's +context already acquired. The job's context is guaranteed to be the same +as the one used by backup_top via backup_job_create. + +Since the previous logic effectively acquired the lock twice, this +broke cleanup of backups for disks using IO threads, since the BDRV_POLL_WHILE +in bdrv_backup_top_drop -> bdrv_do_drained_begin would only release the lock +once, thus deadlocking with the IO thread. + +This is a partial revert of 0abf2581717a19. + +Signed-off-by: Stefan Reiter +Reviewed-by: Max Reitz +Message-Id: <20200407115651.69472-4-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit eca0f3524a4eb57d03a56b0cbcef5527a0981ce4) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 1383e21..ec50946 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -135,11 +135,7 @@ static void backup_abort(Job *job) + static void backup_clean(Job *job) + { + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); +- AioContext *aio_context = bdrv_get_aio_context(s->backup_top); +- +- aio_context_acquire(aio_context); + bdrv_backup_top_drop(s->backup_top); +- aio_context_release(aio_context); + } + + void backup_do_checkpoint(BlockJob *job, Error **errp) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-backup-top-Begin-drain-earlier.patch b/SOURCES/kvm-backup-top-Begin-drain-earlier.patch new file mode 100644 index 0000000..ef289b7 --- /dev/null +++ b/SOURCES/kvm-backup-top-Begin-drain-earlier.patch @@ -0,0 +1,56 @@ +From bc78ee07bf400cbff0021367e05d308870471710 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:45 +0000 +Subject: [PATCH 12/18] backup-top: Begin drain earlier + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-6-slp@redhat.com> +Patchwork-id: 93757 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/9] backup-top: Begin drain earlier +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Max Reitz + +When dropping backup-top, we need to drain the node before freeing the +BlockCopyState. Otherwise, requests may still be in flight and then the +assertion in shres_destroy() will fail. + +(This becomes visible in intermittent failure of 056.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191219182638.104621-1-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 503ca1262bab2c11c533a4816d1ff4297d4f58a6) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + block/backup-top.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/backup-top.c b/block/backup-top.c +index 7cdb1f8..818d3f2 100644 +--- a/block/backup-top.c ++++ b/block/backup-top.c +@@ -257,12 +257,12 @@ void bdrv_backup_top_drop(BlockDriverState *bs) + BDRVBackupTopState *s = bs->opaque; + AioContext *aio_context = bdrv_get_aio_context(bs); + +- block_copy_state_free(s->bcs); +- + aio_context_acquire(aio_context); + + bdrv_drained_begin(bs); + ++ block_copy_state_free(s->bcs); ++ + s->active = false; + bdrv_child_refresh_perms(bs, bs->backing, &error_abort); + bdrv_replace_node(bs, backing_bs(bs), &error_abort); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch b/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch new file mode 100644 index 0000000..d6cad06 --- /dev/null +++ b/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch @@ -0,0 +1,116 @@ +From 0ef6691ce8964bb2bbd677756c4e594793ca3ad8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:01 +0000 +Subject: [PATCH 04/18] block: Activate recursively even for already active + nodes + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-4-kwolf@redhat.com> +Patchwork-id: 93749 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] block: Activate recursively even for already active nodes +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +bdrv_invalidate_cache_all() assumes that all nodes in a given subtree +are either active or inactive when it starts. Therefore, as soon as it +arrives at an already active node, it stops. + +However, this assumption is wrong. For example, it's possible to take a +snapshot of an inactive node, which results in an active overlay over an +inactive backing file. The active overlay is probably also the root node +of an inactive BlockBackend (blk->disable_perm == true). + +In this case, bdrv_invalidate_cache_all() does not need to do anything +to activate the overlay node, but it still needs to recurse into the +children and the parents to make sure that after returning success, +really everything is activated. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit 7bb4941ace471fc7dd6ded4749b95b9622baa6ed) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 50 ++++++++++++++++++++++++-------------------------- + 1 file changed, 24 insertions(+), 26 deletions(-) + +diff --git a/block.c b/block.c +index 473eb6e..2e5e8b6 100644 +--- a/block.c ++++ b/block.c +@@ -5335,10 +5335,6 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + return; + } + +- if (!(bs->open_flags & BDRV_O_INACTIVE)) { +- return; +- } +- + QLIST_FOREACH(child, &bs->children, next) { + bdrv_co_invalidate_cache(child->bs, &local_err); + if (local_err) { +@@ -5360,34 +5356,36 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + * just keep the extended permissions for the next time that an activation + * of the image is tried. + */ +- bs->open_flags &= ~BDRV_O_INACTIVE; +- bdrv_get_cumulative_perm(bs, &perm, &shared_perm); +- ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); +- if (ret < 0) { +- bs->open_flags |= BDRV_O_INACTIVE; +- error_propagate(errp, local_err); +- return; +- } +- bdrv_set_perm(bs, perm, shared_perm); +- +- if (bs->drv->bdrv_co_invalidate_cache) { +- bs->drv->bdrv_co_invalidate_cache(bs, &local_err); +- if (local_err) { ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ bs->open_flags &= ~BDRV_O_INACTIVE; ++ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); ++ ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); ++ if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return; + } +- } ++ bdrv_set_perm(bs, perm, shared_perm); + +- FOR_EACH_DIRTY_BITMAP(bs, bm) { +- bdrv_dirty_bitmap_skip_store(bm, false); +- } ++ if (bs->drv->bdrv_co_invalidate_cache) { ++ bs->drv->bdrv_co_invalidate_cache(bs, &local_err); ++ if (local_err) { ++ bs->open_flags |= BDRV_O_INACTIVE; ++ error_propagate(errp, local_err); ++ return; ++ } ++ } + +- ret = refresh_total_sectors(bs, bs->total_sectors); +- if (ret < 0) { +- bs->open_flags |= BDRV_O_INACTIVE; +- error_setg_errno(errp, -ret, "Could not refresh total sector count"); +- return; ++ FOR_EACH_DIRTY_BITMAP(bs, bm) { ++ bdrv_dirty_bitmap_skip_store(bm, false); ++ } ++ ++ ret = refresh_total_sectors(bs, bs->total_sectors); ++ if (ret < 0) { ++ bs->open_flags |= BDRV_O_INACTIVE; ++ error_setg_errno(errp, -ret, "Could not refresh total sector count"); ++ return; ++ } + } + + QLIST_FOREACH(parent, &bs->parents, next_parent) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch b/SOURCES/kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch new file mode 100644 index 0000000..bc67279 --- /dev/null +++ b/SOURCES/kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch @@ -0,0 +1,283 @@ +From 13e2076f5c4adbc9a3f96c8978150aa5e423e14a Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:30 +0100 +Subject: [PATCH 02/17] block: Add flags to BlockDriver.bdrv_co_truncate() + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-2-kwolf@redhat.com> +Patchwork-id: 97448 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 01/11] block: Add flags to BlockDriver.bdrv_co_truncate() +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate() +driver callbacks, and a supported_truncate_flags field in +BlockDriverState that allows drivers to advertise support for request +flags in the context of truncate. + +For now, we always pass 0 and no drivers declare support for any flag. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Max Reitz +Message-Id: <20200424125448.63318-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 92b92799dc8662b6f71809100a4aabc1ae408ebb) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/crypto.c | 3 ++- + block/file-posix.c | 2 +- + block/file-win32.c | 2 +- + block/gluster.c | 1 + + block/io.c | 8 +++++++- + block/iscsi.c | 2 +- + block/nfs.c | 3 ++- + block/qcow2.c | 2 +- + block/qed.c | 1 + + block/raw-format.c | 2 +- + block/rbd.c | 1 + + block/sheepdog.c | 4 ++-- + block/ssh.c | 2 +- + include/block/block_int.h | 10 +++++++++- + tests/test-block-iothread.c | 3 ++- + 15 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/block/crypto.c b/block/crypto.c +index 5e3b15c..6e4b726 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -299,7 +299,8 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, + + static int coroutine_fn + block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, BdrvRequestFlags flags, ++ Error **errp) + { + BlockCrypto *crypto = bs->opaque; + uint64_t payload_offset = +diff --git a/block/file-posix.c b/block/file-posix.c +index 1609598..7551e8d 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2021,7 +2021,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + + static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp) ++ BdrvRequestFlags flags, Error **errp) + { + BDRVRawState *s = bs->opaque; + struct stat st; +diff --git a/block/file-win32.c b/block/file-win32.c +index 1585983..a6b0dda 100644 +--- a/block/file-win32.c ++++ b/block/file-win32.c +@@ -469,7 +469,7 @@ static void raw_close(BlockDriverState *bs) + + static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp) ++ BdrvRequestFlags flags, Error **errp) + { + BDRVRawState *s = bs->opaque; + LONG low, high; +diff --git a/block/gluster.c b/block/gluster.c +index 0aa1f2c..d06df90 100644 +--- a/block/gluster.c ++++ b/block/gluster.c +@@ -1228,6 +1228,7 @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, + int64_t offset, + bool exact, + PreallocMode prealloc, ++ BdrvRequestFlags flags, + Error **errp) + { + BDRVGlusterState *s = bs->opaque; +diff --git a/block/io.c b/block/io.c +index f75777f..549e5a4 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -3320,6 +3320,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + BlockDriverState *bs = child->bs; + BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; ++ BdrvRequestFlags flags = 0; + int64_t old_size, new_bytes; + int ret; + +@@ -3370,7 +3371,12 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + } + + if (drv->bdrv_co_truncate) { +- ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp); ++ if (flags & ~bs->supported_truncate_flags) { ++ error_setg(errp, "Block driver does not support requested flags"); ++ ret = -ENOTSUP; ++ goto out; ++ } ++ ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); + } else if (bs->file && drv->is_filter) { + ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); + } else { +diff --git a/block/iscsi.c b/block/iscsi.c +index 16b0716..0bea2d3 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -2125,7 +2125,7 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) + + static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp) ++ BdrvRequestFlags flags, Error **errp) + { + IscsiLun *iscsilun = bs->opaque; + int64_t cur_length; +diff --git a/block/nfs.c b/block/nfs.c +index cc2413d..2393fbf 100644 +--- a/block/nfs.c ++++ b/block/nfs.c +@@ -755,7 +755,8 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) + + static int coroutine_fn + nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, BdrvRequestFlags flags, ++ Error **errp) + { + NFSClient *client = bs->opaque; + int ret; +diff --git a/block/qcow2.c b/block/qcow2.c +index dbd870a..977445e 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3948,7 +3948,7 @@ fail: + + static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp) ++ BdrvRequestFlags flags, Error **errp) + { + BDRVQcow2State *s = bs->opaque; + uint64_t old_length; +diff --git a/block/qed.c b/block/qed.c +index 1af9b3c..fb6100b 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -1467,6 +1467,7 @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, + int64_t offset, + bool exact, + PreallocMode prealloc, ++ BdrvRequestFlags flags, + Error **errp) + { + BDRVQEDState *s = bs->opaque; +diff --git a/block/raw-format.c b/block/raw-format.c +index 4bb54f4..f994c4a 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -371,7 +371,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) + + static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp) ++ BdrvRequestFlags flags, Error **errp) + { + BDRVRawState *s = bs->opaque; + +diff --git a/block/rbd.c b/block/rbd.c +index 8847259..fcdb60a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1090,6 +1090,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, + int64_t offset, + bool exact, + PreallocMode prealloc, ++ BdrvRequestFlags flags, + Error **errp) + { + int r; +diff --git a/block/sheepdog.c b/block/sheepdog.c +index a8a7e32..077aed8 100644 +--- a/block/sheepdog.c ++++ b/block/sheepdog.c +@@ -2288,7 +2288,7 @@ static int64_t sd_getlength(BlockDriverState *bs) + + static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp) ++ BdrvRequestFlags flags, Error **errp) + { + BDRVSheepdogState *s = bs->opaque; + int ret, fd; +@@ -2604,7 +2604,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, + + assert(!flags); + if (offset > s->inode.vdi_size) { +- ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL); ++ ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + return ret; + } +diff --git a/block/ssh.c b/block/ssh.c +index 84e9282..9eb33df 100644 +--- a/block/ssh.c ++++ b/block/ssh.c +@@ -1298,7 +1298,7 @@ static int64_t ssh_getlength(BlockDriverState *bs) + + static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp) ++ BdrvRequestFlags flags, Error **errp) + { + BDRVSSHState *s = bs->opaque; + +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 876a83d..41f13ec 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -356,7 +356,7 @@ struct BlockDriver { + */ + int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, +- Error **errp); ++ BdrvRequestFlags flags, Error **errp); + + int64_t (*bdrv_getlength)(BlockDriverState *bs); + bool has_variable_length; +@@ -849,6 +849,14 @@ struct BlockDriverState { + /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, + * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ + unsigned int supported_zero_flags; ++ /* ++ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). ++ * ++ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure ++ * that any added space reads as all zeros. If this can't be guaranteed, ++ * the operation must fail. ++ */ ++ unsigned int supported_truncate_flags; + + /* the following member gives a name to every node on the bs graph. */ + char node_name[32]; +diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c +index 0c86180..2f3b763 100644 +--- a/tests/test-block-iothread.c ++++ b/tests/test-block-iothread.c +@@ -46,7 +46,8 @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs, + + static int coroutine_fn + bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, BdrvRequestFlags flags, ++ Error **errp) + { + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Add-flags-to-bdrv-_co-_truncate.patch b/SOURCES/kvm-block-Add-flags-to-bdrv-_co-_truncate.patch new file mode 100644 index 0000000..3da05ff --- /dev/null +++ b/SOURCES/kvm-block-Add-flags-to-bdrv-_co-_truncate.patch @@ -0,0 +1,353 @@ +From 50127f0ff9e13a15fd5bfeb2662e2404ff20f364 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:31 +0100 +Subject: [PATCH 03/17] block: Add flags to bdrv(_co)_truncate() + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-3-kwolf@redhat.com> +Patchwork-id: 97445 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 02/11] block: Add flags to bdrv(_co)_truncate() +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +Now that block drivers can support flags for .bdrv_co_truncate, expose +the parameter in the node level interfaces bdrv_co_truncate() and +bdrv_truncate(). + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Max Reitz +Message-Id: <20200424125448.63318-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7b8e4857426f2e2de2441749996c6161b550bada) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 2 +- + block/crypto.c | 2 +- + block/io.c | 12 +++++++----- + block/parallels.c | 6 +++--- + block/qcow.c | 4 ++-- + block/qcow2-refcount.c | 2 +- + block/qcow2.c | 15 +++++++++------ + block/raw-format.c | 2 +- + block/vhdx-log.c | 2 +- + block/vhdx.c | 2 +- + block/vmdk.c | 2 +- + include/block/block.h | 5 +++-- + tests/test-block-iothread.c | 6 +++--- + 13 files changed, 34 insertions(+), 28 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 38ae413..8be2006 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -2144,7 +2144,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + return -ENOMEDIUM; + } + +- return bdrv_truncate(blk->root, offset, exact, prealloc, errp); ++ return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); + } + + int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, +diff --git a/block/crypto.c b/block/crypto.c +index 6e4b726..fcb4a97 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -313,7 +313,7 @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, + + offset += payload_offset; + +- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); ++ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); + } + + static void block_crypto_close(BlockDriverState *bs) +diff --git a/block/io.c b/block/io.c +index 549e5a4..3235ce5 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -3315,12 +3315,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) + * 'offset' bytes in length. + */ + int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, BdrvRequestFlags flags, ++ Error **errp) + { + BlockDriverState *bs = child->bs; + BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; +- BdrvRequestFlags flags = 0; + int64_t old_size, new_bytes; + int ret; + +@@ -3378,7 +3378,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + } + ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); + } else if (bs->file && drv->is_filter) { +- ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); ++ ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); + } else { + error_setg(errp, "Image format driver does not support resize"); + ret = -ENOTSUP; +@@ -3411,6 +3411,7 @@ typedef struct TruncateCo { + int64_t offset; + bool exact; + PreallocMode prealloc; ++ BdrvRequestFlags flags; + Error **errp; + int ret; + } TruncateCo; +@@ -3419,12 +3420,12 @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque) + { + TruncateCo *tco = opaque; + tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact, +- tco->prealloc, tco->errp); ++ tco->prealloc, tco->flags, tco->errp); + aio_wait_kick(); + } + + int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) + { + Coroutine *co; + TruncateCo tco = { +@@ -3432,6 +3433,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, + .offset = offset, + .exact = exact, + .prealloc = prealloc, ++ .flags = flags, + .errp = errp, + .ret = NOT_DONE, + }; +diff --git a/block/parallels.c b/block/parallels.c +index 6d4ed77..2be92cf 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -203,7 +203,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, + } else { + ret = bdrv_truncate(bs->file, + (s->data_end + space) << BDRV_SECTOR_BITS, +- false, PREALLOC_MODE_OFF, NULL); ++ false, PREALLOC_MODE_OFF, 0, NULL); + } + if (ret < 0) { + return ret; +@@ -493,7 +493,7 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, + * That means we have to pass exact=true. + */ + ret = bdrv_truncate(bs->file, res->image_end_offset, true, +- PREALLOC_MODE_OFF, &local_err); ++ PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + res->check_errors++; +@@ -889,7 +889,7 @@ static void parallels_close(BlockDriverState *bs) + + /* errors are ignored, so we might as well pass exact=true */ + bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, +- PREALLOC_MODE_OFF, NULL); ++ PREALLOC_MODE_OFF, 0, NULL); + } + + g_free(s->bat_dirty_bmap); +diff --git a/block/qcow.c b/block/qcow.c +index 8973e4e..6b5f226 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -480,7 +480,7 @@ static int get_cluster_offset(BlockDriverState *bs, + return -E2BIG; + } + ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, +- false, PREALLOC_MODE_OFF, NULL); ++ false, PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + return ret; + } +@@ -1035,7 +1035,7 @@ static int qcow_make_empty(BlockDriverState *bs) + l1_length) < 0) + return -1; + ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, +- PREALLOC_MODE_OFF, NULL); ++ PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) + return ret; + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index f67ac6b..3a90d75 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2017,7 +2017,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, + } + + ret = bdrv_truncate(bs->file, offset + s->cluster_size, false, +- PREALLOC_MODE_OFF, &local_err); ++ PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto resize_fail; +diff --git a/block/qcow2.c b/block/qcow2.c +index 977445e..c0fdcb9 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3082,7 +3082,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + mode = PREALLOC_MODE_OFF; + } + ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, +- mode, errp); ++ mode, 0, errp); + if (ret < 0) { + return ret; + } +@@ -4044,7 +4044,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + * always fulfilled, so there is no need to pass it on.) + */ + bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, +- false, PREALLOC_MODE_OFF, &local_err); ++ false, PREALLOC_MODE_OFF, 0, &local_err); + if (local_err) { + warn_reportf_err(local_err, + "Failed to truncate the tail of the image: "); +@@ -4066,7 +4066,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + * file should be resized to the exact target size, too, + * so we pass @exact here. + */ +- ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp); ++ ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0, ++ errp); + if (ret < 0) { + goto fail; + } +@@ -4152,7 +4153,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + new_file_size = allocation_start + + nb_new_data_clusters * s->cluster_size; + /* Image file grows, so @exact does not matter */ +- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp); ++ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, ++ errp); + if (ret < 0) { + error_prepend(errp, "Failed to resize underlying file: "); + qcow2_free_clusters(bs, allocation_start, +@@ -4255,7 +4257,8 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, + if (len < 0) { + return len; + } +- return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL); ++ return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0, ++ NULL); + } + + if (offset_into_cluster(s, offset)) { +@@ -4493,7 +4496,7 @@ static int make_completely_empty(BlockDriverState *bs) + } + + ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, +- PREALLOC_MODE_OFF, &local_err); ++ PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto fail; +diff --git a/block/raw-format.c b/block/raw-format.c +index f994c4a..c3acf9a 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -387,7 +387,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + + s->size = offset; + offset += s->offset; +- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); ++ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); + } + + static void raw_eject(BlockDriverState *bs, bool eject_flag) +diff --git a/block/vhdx-log.c b/block/vhdx-log.c +index 13a49c2..404fb5f 100644 +--- a/block/vhdx-log.c ++++ b/block/vhdx-log.c +@@ -558,7 +558,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, + goto exit; + } + ret = bdrv_truncate(bs->file, new_file_size, false, +- PREALLOC_MODE_OFF, NULL); ++ PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + goto exit; + } +diff --git a/block/vhdx.c b/block/vhdx.c +index 33e57cd..5dfbb20 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -1264,7 +1264,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, + } + + return bdrv_truncate(bs->file, *new_offset + s->block_size, false, +- PREALLOC_MODE_OFF, NULL); ++ PREALLOC_MODE_OFF, 0, NULL); + } + + /* +diff --git a/block/vmdk.c b/block/vmdk.c +index eb726f2..1bbf937 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2077,7 +2077,7 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, + } + length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); + ret = bdrv_truncate(s->extents[i].file, length, false, +- PREALLOC_MODE_OFF, NULL); ++ PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + return ret; + } +diff --git a/include/block/block.h b/include/block/block.h +index b2a3074..4913596 100644 +--- a/include/block/block.h ++++ b/include/block/block.h +@@ -348,9 +348,10 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + void bdrv_refresh_filename(BlockDriverState *bs); + + int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp); ++ PreallocMode prealloc, BdrvRequestFlags flags, ++ Error **errp); + int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp); ++ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); + + int64_t bdrv_nb_sectors(BlockDriverState *bs); + int64_t bdrv_getlength(BlockDriverState *bs); +diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c +index 2f3b763..71e9bce 100644 +--- a/tests/test-block-iothread.c ++++ b/tests/test-block-iothread.c +@@ -186,18 +186,18 @@ static void test_sync_op_truncate(BdrvChild *c) + int ret; + + /* Normal success path */ +- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); ++ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); + g_assert_cmpint(ret, ==, 0); + + /* Early error: Negative offset */ +- ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL); ++ ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL); + g_assert_cmpint(ret, ==, -EINVAL); + + /* Error: Read-only image */ + c->bs->read_only = true; + c->bs->open_flags &= ~BDRV_O_RDWR; + +- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); ++ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); + g_assert_cmpint(ret, ==, -EACCES); + + c->bs->read_only = false; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch b/SOURCES/kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch new file mode 100644 index 0000000..190826f --- /dev/null +++ b/SOURCES/kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch @@ -0,0 +1,105 @@ +From c8ecaea34f03b8ddda7d2b41b0d6f397469c8959 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 10 Jun 2020 18:32:02 -0400 +Subject: [PATCH 2/2] block: Call attention to truncation of long NBD exports + +RH-Author: Eric Blake +Message-id: <20200610183202.3780750-3-eblake@redhat.com> +Patchwork-id: 97495 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block: Call attention to truncation of long NBD exports +Bugzilla: 1845384 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Commit 93676c88 relaxed our NBD client code to request export names up +to the NBD protocol maximum of 4096 bytes without NUL terminator, even +though the block layer can't store anything longer than 4096 bytes +including NUL terminator for display to the user. Since this means +there are some export names where we have to truncate things, we can +at least try to make the truncation a bit more obvious for the user. +Note that in spite of the truncated display name, we can still +communicate with an NBD server using such a long export name; this was +deemed nicer than refusing to even connect to such a server (since the +server may not be under our control, and since determining our actual +length limits gets tricky when nbd://host:port/export and +nbd+unix:///export?socket=/path are themselves variable-length +expansions beyond the export name but count towards the block layer +name length). + +Reported-by: Xueqiang Wei +Fixes: https://bugzilla.redhat.com/1843684 +Signed-off-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200610163741.3745251-3-eblake@redhat.com> +(cherry picked from commit 5c86bdf1208916ece0b87e1151c9b48ee54faa3e) +Signed-off-by: Eric Blake +Signed-off-by: Eduardo Lima (Etrunko) +--- + block.c | 7 +++++-- + block/nbd.c | 21 +++++++++++++-------- + 2 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/block.c b/block.c +index 12c8941879..57740d312e 100644 +--- a/block.c ++++ b/block.c +@@ -6683,8 +6683,11 @@ void bdrv_refresh_filename(BlockDriverState *bs) + pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); + } else { + QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); +- snprintf(bs->filename, sizeof(bs->filename), "json:%s", +- qstring_get_str(json)); ++ if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", ++ qstring_get_str(json)) >= sizeof(bs->filename)) { ++ /* Give user a hint if we truncated things. */ ++ strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); ++ } + qobject_unref(json); + } + } +diff --git a/block/nbd.c b/block/nbd.c +index 927915d93d..5bb154017d 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -1978,6 +1978,7 @@ static void nbd_refresh_filename(BlockDriverState *bs) + { + BDRVNBDState *s = bs->opaque; + const char *host = NULL, *port = NULL, *path = NULL; ++ size_t len = 0; + + if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) { + const InetSocketAddress *inet = &s->saddr->u.inet; +@@ -1990,17 +1991,21 @@ static void nbd_refresh_filename(BlockDriverState *bs) + } /* else can't represent as pseudo-filename */ + + if (path && s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd+unix:///%s?socket=%s", s->export, path); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd+unix:///%s?socket=%s", s->export, path); + } else if (path && !s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd+unix://?socket=%s", path); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd+unix://?socket=%s", path); + } else if (host && s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd://%s:%s/%s", host, port, s->export); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd://%s:%s/%s", host, port, s->export); + } else if (host && !s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd://%s:%s", host, port); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd://%s:%s", host, port); ++ } ++ if (len > sizeof(bs->exact_filename)) { ++ /* Name is too long to represent exactly, so leave it empty. */ ++ bs->exact_filename[0] = '\0'; + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch b/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch new file mode 100644 index 0000000..b16c0b7 --- /dev/null +++ b/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch @@ -0,0 +1,84 @@ +From f17b37b58a57d849d2ff5fa04f149d9415803a39 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:17 +0100 +Subject: [PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-7-kwolf@redhat.com> +Patchwork-id: 94599 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +Waiting in blk_wait_while_drained() while blk->in_flight is increased +for the current request is wrong because it will cause the drain +operation to deadlock. + +This patch makes sure that blk_wait_while_drained() is called with +blk->in_flight increased exactly once for the current request, and that +it temporarily decreases the counter while it waits. + +Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20200407121259.21350-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7f16476fab14fc32388e0ebae793f64673848efa) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 610dbfa..38ae413 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, + return 0; + } + ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ + static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) + { ++ assert(blk->in_flight > 0); ++ + if (blk->quiesce_counter && !blk->disable_request_queuing) { ++ blk_dec_in_flight(blk); + qemu_co_queue_wait(&blk->queued_requests, NULL); ++ blk_inc_in_flight(blk); + } + } + +@@ -1418,12 +1423,6 @@ static void blk_aio_read_entry(void *opaque) + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + +- if (rwco->blk->quiesce_counter) { +- blk_dec_in_flight(rwco->blk); +- blk_wait_while_drained(rwco->blk); +- blk_inc_in_flight(rwco->blk); +- } +- + assert(qiov->size == acb->bytes); + rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); +@@ -1436,12 +1435,6 @@ static void blk_aio_write_entry(void *opaque) + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + +- if (rwco->blk->quiesce_counter) { +- blk_dec_in_flight(rwco->blk); +- blk_wait_while_drained(rwco->blk); +- blk_inc_in_flight(rwco->blk); +- } +- + assert(!qiov || qiov->size == acb->bytes); + rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, + qiov, 0, rwco->flags); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch b/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch new file mode 100644 index 0000000..0bad890 --- /dev/null +++ b/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch @@ -0,0 +1,91 @@ +From 5774af5a3c713d0c93010c30453812eae6a749cd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:37 +0000 +Subject: [PATCH 17/20] block: Fix cross-AioContext blockdev-snapshot + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-12-kwolf@redhat.com> +Patchwork-id: 94286 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 11/13] block: Fix cross-AioContext blockdev-snapshot +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +external_snapshot_prepare() tries to move the overlay to the AioContext +of the backing file (the snapshotted node). However, it's possible that +this doesn't work, but the backing file can instead be moved to the +overlay's AioContext (e.g. opening the backing chain for a mirror +target). + +bdrv_append() already indirectly uses bdrv_attach_node(), which takes +care to move nodes to make sure they use the same AioContext and which +tries both directions. + +So the problem has a simple fix: Just delete the unnecessary extra +bdrv_try_set_aio_context() call in external_snapshot_prepare() and +instead assert in bdrv_append() that both nodes were indeed moved to the +same AioContext. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-6-kwolf@redhat.com> +Tested-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 30dd65f307b647eef8156c4a33bd007823ef85cb) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 1 + + blockdev.c | 16 ---------------- + 2 files changed, 1 insertion(+), 16 deletions(-) + +diff --git a/block.c b/block.c +index 354d388..ec29b1e 100644 +--- a/block.c ++++ b/block.c +@@ -4327,6 +4327,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + bdrv_ref(from); + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); ++ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); + bdrv_drained_begin(from); + + /* Put all parents into @list and calculate their cumulative permissions */ +diff --git a/blockdev.c b/blockdev.c +index 7918533..c8d4b51 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1535,9 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; +- AioContext *old_context; + uint64_t perm, shared; +- int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar + * purpose but a different set of parameters */ +@@ -1678,20 +1676,6 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + +- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(state->new_bs); +- aio_context_release(aio_context); +- aio_context_acquire(old_context); +- +- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); +- +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- +- if (ret < 0) { +- goto out; +- } +- + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch b/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch new file mode 100644 index 0000000..1735dc0 --- /dev/null +++ b/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch @@ -0,0 +1,60 @@ +From 05452efd7e0fb0522099ae09a396f8f97e66014a Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:47 +0000 +Subject: [PATCH 06/20] block: Fix leak in bdrv_create_file_fallback() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-7-mlevitsk@redhat.com> +Patchwork-id: 94229 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] block: Fix leak in bdrv_create_file_fallback() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +@options is leaked by the first two return statements in this function. + +Note that blk_new_open() takes the reference to @options even on +failure, so all we need to do to fix the leak is to move the QDict +allocation down to where we actually need it. + +Reported-by: Coverity (CID 1419884) +Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd + ("block: Generic file creation fallback") +Signed-off-by: Max Reitz +Message-Id: <20200225155618.133412-1-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit eeea1faa099f82328f5831cf252f8ce0a59a9287) +Signed-off-by: Maxim Levitsky + +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 3beec7f..e1a4e38 100644 +--- a/block.c ++++ b/block.c +@@ -600,7 +600,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, + QemuOpts *opts, Error **errp) + { + BlockBackend *blk; +- QDict *options = qdict_new(); ++ QDict *options; + int64_t size = 0; + char *buf = NULL; + PreallocMode prealloc; +@@ -623,6 +623,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, + return -ENOTSUP; + } + ++ options = qdict_new(); + qdict_put_str(options, "driver", drv->format_name); + + blk = blk_new_open(filename, NULL, options, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Generic-file-creation-fallback.patch b/SOURCES/kvm-block-Generic-file-creation-fallback.patch new file mode 100644 index 0000000..a5dd1d7 --- /dev/null +++ b/SOURCES/kvm-block-Generic-file-creation-fallback.patch @@ -0,0 +1,227 @@ +From 882d09226b7f45b72c5b7763c4c4aba182e0f8a1 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:43 +0000 +Subject: [PATCH 02/20] block: Generic file creation fallback + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-3-mlevitsk@redhat.com> +Patchwork-id: 94227 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] block: Generic file creation fallback +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +If a protocol driver does not support image creation, we can see whether +maybe the file exists already. If so, just truncating it will be +sufficient. + +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-3-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit fd17146cd93d1704cd96d7c2757b325fc7aac6fd) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 147 insertions(+), 12 deletions(-) + +diff --git a/block.c b/block.c +index 2e5e8b6..3beec7f 100644 +--- a/block.c ++++ b/block.c +@@ -532,20 +532,139 @@ out: + return ret; + } + +-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) ++/** ++ * Helper function for bdrv_create_file_fallback(): Resize @blk to at ++ * least the given @minimum_size. ++ * ++ * On success, return @blk's actual length. ++ * Otherwise, return -errno. ++ */ ++static int64_t create_file_fallback_truncate(BlockBackend *blk, ++ int64_t minimum_size, Error **errp) + { +- BlockDriver *drv; ++ Error *local_err = NULL; ++ int64_t size; ++ int ret; ++ ++ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); ++ if (ret < 0 && ret != -ENOTSUP) { ++ error_propagate(errp, local_err); ++ return ret; ++ } ++ ++ size = blk_getlength(blk); ++ if (size < 0) { ++ error_free(local_err); ++ error_setg_errno(errp, -size, ++ "Failed to inquire the new image file's length"); ++ return size; ++ } ++ ++ if (size < minimum_size) { ++ /* Need to grow the image, but we failed to do that */ ++ error_propagate(errp, local_err); ++ return -ENOTSUP; ++ } ++ ++ error_free(local_err); ++ local_err = NULL; ++ ++ return size; ++} ++ ++/** ++ * Helper function for bdrv_create_file_fallback(): Zero the first ++ * sector to remove any potentially pre-existing image header. ++ */ ++static int create_file_fallback_zero_first_sector(BlockBackend *blk, ++ int64_t current_size, ++ Error **errp) ++{ ++ int64_t bytes_to_clear; ++ int ret; ++ ++ bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); ++ if (bytes_to_clear) { ++ ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "Failed to clear the new image's first sector"); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, ++ QemuOpts *opts, Error **errp) ++{ ++ BlockBackend *blk; ++ QDict *options = qdict_new(); ++ int64_t size = 0; ++ char *buf = NULL; ++ PreallocMode prealloc; + Error *local_err = NULL; + int ret; + ++ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); ++ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); ++ prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, ++ PREALLOC_MODE_OFF, &local_err); ++ g_free(buf); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return -EINVAL; ++ } ++ ++ if (prealloc != PREALLOC_MODE_OFF) { ++ error_setg(errp, "Unsupported preallocation mode '%s'", ++ PreallocMode_str(prealloc)); ++ return -ENOTSUP; ++ } ++ ++ qdict_put_str(options, "driver", drv->format_name); ++ ++ blk = blk_new_open(filename, NULL, options, ++ BDRV_O_RDWR | BDRV_O_RESIZE, errp); ++ if (!blk) { ++ error_prepend(errp, "Protocol driver '%s' does not support image " ++ "creation, and opening the image failed: ", ++ drv->format_name); ++ return -EINVAL; ++ } ++ ++ size = create_file_fallback_truncate(blk, size, errp); ++ if (size < 0) { ++ ret = size; ++ goto out; ++ } ++ ++ ret = create_file_fallback_zero_first_sector(blk, size, errp); ++ if (ret < 0) { ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ blk_unref(blk); ++ return ret; ++} ++ ++int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) ++{ ++ BlockDriver *drv; ++ + drv = bdrv_find_protocol(filename, true, errp); + if (drv == NULL) { + return -ENOENT; + } + +- ret = bdrv_create(drv, filename, opts, &local_err); +- error_propagate(errp, local_err); +- return ret; ++ if (drv->bdrv_co_create_opts) { ++ return bdrv_create(drv, filename, opts, errp); ++ } else { ++ return bdrv_create_file_fallback(filename, drv, opts, errp); ++ } + } + + /** +@@ -1422,6 +1541,24 @@ QemuOptsList bdrv_runtime_opts = { + }, + }; + ++static QemuOptsList fallback_create_opts = { ++ .name = "fallback-create-opts", ++ .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), ++ .desc = { ++ { ++ .name = BLOCK_OPT_SIZE, ++ .type = QEMU_OPT_SIZE, ++ .help = "Virtual disk size" ++ }, ++ { ++ .name = BLOCK_OPT_PREALLOC, ++ .type = QEMU_OPT_STRING, ++ .help = "Preallocation mode (allowed values: off)" ++ }, ++ { /* end of list */ } ++ } ++}; ++ + /* + * Common part for opening disk images and files + * +@@ -5743,14 +5880,12 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- if (!proto_drv->create_opts) { +- error_setg(errp, "Protocol driver '%s' does not support image creation", +- proto_drv->format_name); +- return; +- } +- + create_opts = qemu_opts_append(create_opts, drv->create_opts); +- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ if (proto_drv->create_opts) { ++ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ } else { ++ create_opts = qemu_opts_append(create_opts, &fallback_create_opts); ++ } + + /* Create parameter list with default values */ + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch b/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch new file mode 100644 index 0000000..463501a --- /dev/null +++ b/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch @@ -0,0 +1,295 @@ +From 52cc1d1cd2f695c5761d65baec961d14552a79ed Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:16 +0100 +Subject: [PATCH 5/6] block: Increase BB.in_flight for coroutine and sync + interfaces + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-6-kwolf@redhat.com> +Patchwork-id: 94600 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] block: Increase BB.in_flight for coroutine and sync interfaces +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +External callers of blk_co_*() and of the synchronous blk_*() functions +don't currently increase the BlockBackend.in_flight counter, but calls +from blk_aio_*() do, so there is an inconsistency whether the counter +has been increased or not. + +This patch moves the actual operations to static functions that can +later know they will always be called with in_flight increased exactly +once, even for external callers using the blk_co_*() coroutine +interfaces. + +If the public blk_co_*() interface is unused, remove it. + +Signed-off-by: Kevin Wolf +Message-Id: <20200407121259.21350-3-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit fbb92b6798894d3bf62fe3578d99fa62c720b242) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 103 ++++++++++++++++++++++++++++++++--------- + include/sysemu/block-backend.h | 1 - + 2 files changed, 80 insertions(+), 24 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 17b2e87..610dbfa 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1147,9 +1147,10 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) + } + } + +-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, +- unsigned int bytes, QEMUIOVector *qiov, +- BdrvRequestFlags flags) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, ++ QEMUIOVector *qiov, BdrvRequestFlags flags) + { + int ret; + BlockDriverState *bs; +@@ -1178,10 +1179,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + return ret; + } + +-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, +- unsigned int bytes, +- QEMUIOVector *qiov, size_t qiov_offset, +- BdrvRequestFlags flags) ++int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, ++ unsigned int bytes, QEMUIOVector *qiov, ++ BdrvRequestFlags flags) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_preadv(blk, offset, bytes, qiov, flags); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, ++ QEMUIOVector *qiov, size_t qiov_offset, ++ BdrvRequestFlags flags) + { + int ret; + BlockDriverState *bs; +@@ -1214,6 +1229,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + return ret; + } + ++int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, ++ unsigned int bytes, ++ QEMUIOVector *qiov, size_t qiov_offset, ++ BdrvRequestFlags flags) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +@@ -1234,7 +1263,7 @@ static void blk_read_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, ++ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); + aio_wait_kick(); + } +@@ -1244,8 +1273,8 @@ static void blk_write_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, +- qiov, rwco->flags); ++ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, ++ qiov, 0, rwco->flags); + aio_wait_kick(); + } + +@@ -1262,6 +1291,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + .ret = NOT_DONE, + }; + ++ blk_inc_in_flight(blk); + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + co_entry(&rwco); +@@ -1270,6 +1300,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + bdrv_coroutine_enter(blk_bs(blk), co); + BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); + } ++ blk_dec_in_flight(blk); + + return rwco.ret; + } +@@ -1394,7 +1425,7 @@ static void blk_aio_read_entry(void *opaque) + } + + assert(qiov->size == acb->bytes); +- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, ++ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); + blk_aio_complete(acb); + } +@@ -1412,8 +1443,8 @@ static void blk_aio_write_entry(void *opaque) + } + + assert(!qiov || qiov->size == acb->bytes); +- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, +- qiov, rwco->flags); ++ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, ++ qiov, 0, rwco->flags); + blk_aio_complete(acb); + } + +@@ -1498,7 +1529,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb) + bdrv_aio_cancel_async(acb); + } + +-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) + { + blk_wait_while_drained(blk); + +@@ -1514,8 +1547,7 @@ static void blk_ioctl_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, +- qiov->iov[0].iov_base); ++ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); + aio_wait_kick(); + } + +@@ -1529,7 +1561,7 @@ static void blk_aio_ioctl_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); ++ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + + blk_aio_complete(acb); + } +@@ -1540,7 +1572,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); + } + +-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + { + int ret; + +@@ -1559,7 +1593,7 @@ static void blk_aio_pdiscard_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); ++ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); + blk_aio_complete(acb); + } + +@@ -1571,12 +1605,23 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, + cb, opaque); + } + ++int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_pdiscard(blk, offset, bytes); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + static void blk_pdiscard_entry(void *opaque) + { + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); ++ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); + aio_wait_kick(); + } + +@@ -1585,7 +1630,8 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); + } + +-int blk_co_flush(BlockBackend *blk) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn blk_do_flush(BlockBackend *blk) + { + blk_wait_while_drained(blk); + +@@ -1601,7 +1647,7 @@ static void blk_aio_flush_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_flush(rwco->blk); ++ rwco->ret = blk_do_flush(rwco->blk); + blk_aio_complete(acb); + } + +@@ -1611,10 +1657,21 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, + return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); + } + ++int coroutine_fn blk_co_flush(BlockBackend *blk) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_flush(blk); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + static void blk_flush_entry(void *opaque) + { + BlkRwCo *rwco = opaque; +- rwco->ret = blk_co_flush(rwco->blk); ++ rwco->ret = blk_do_flush(rwco->blk); + aio_wait_kick(); + } + +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index b198dec..9bbdbd6 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque); + void blk_aio_cancel(BlockAIOCB *acb); + void blk_aio_cancel_async(BlockAIOCB *acb); +-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch b/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch new file mode 100644 index 0000000..72c8986 --- /dev/null +++ b/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch @@ -0,0 +1,65 @@ +From f7dd953c2d0380cef3c351afb03d68c6fcda1dca Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:28 +0000 +Subject: [PATCH 08/20] block: Introduce 'bdrv_reopen_commit_post' step + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-3-kwolf@redhat.com> +Patchwork-id: 94278 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/13] block: Introduce 'bdrv_reopen_commit_post' step +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +Add another step in the reopen process where driver can execute code +after permission changes are comitted. + +Signed-off-by: Peter Krempa +Message-Id: +Signed-off-by: Kevin Wolf +(cherry picked from commit 17e1e2be5f9e84e0298e28e70675655b43e225ea) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 9 +++++++++ + include/block/block_int.h | 1 + + 2 files changed, 10 insertions(+) + +diff --git a/block.c b/block.c +index e1a4e38..a744bb5 100644 +--- a/block.c ++++ b/block.c +@@ -3657,6 +3657,15 @@ cleanup_perm: + } + } + } ++ ++ if (ret == 0) { ++ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { ++ BlockDriverState *bs = bs_entry->state.bs; ++ ++ if (bs->drv->bdrv_reopen_commit_post) ++ bs->drv->bdrv_reopen_commit_post(&bs_entry->state); ++ } ++ } + cleanup: + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (ret) { +diff --git a/include/block/block_int.h b/include/block/block_int.h +index dd033d0..c168690 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -123,6 +123,7 @@ struct BlockDriver { + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); ++ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + void (*bdrv_join_options)(QDict *options, QDict *old_options); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch b/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch new file mode 100644 index 0000000..2f0f999 --- /dev/null +++ b/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch @@ -0,0 +1,67 @@ +From 294ab4c4963295556d12ac15150b48c8536175a7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:33 +0000 +Subject: [PATCH 13/20] block: Make bdrv_get_cumulative_perm() public + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-8-kwolf@redhat.com> +Patchwork-id: 94287 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/13] block: Make bdrv_get_cumulative_perm() public +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-2-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit c7a0f2be8f95b220cdadbba9a9236eaf115951dc) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 6 ++---- + include/block/block_int.h | 3 +++ + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/block.c b/block.c +index 39e4647..354d388 100644 +--- a/block.c ++++ b/block.c +@@ -1850,8 +1850,6 @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, + bool *tighten_restrictions, Error **errp); + static void bdrv_child_abort_perm_update(BdrvChild *c); + static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, +- uint64_t *shared_perm); + + typedef struct BlockReopenQueueEntry { + bool prepared; +@@ -2075,8 +2073,8 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + } + } + +-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, +- uint64_t *shared_perm) ++void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, ++ uint64_t *shared_perm) + { + BdrvChild *c; + uint64_t cumulative_perms = 0; +diff --git a/include/block/block_int.h b/include/block/block_int.h +index c168690..96e327b 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1228,6 +1228,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + void *opaque, Error **errp); + void bdrv_root_unref_child(BdrvChild *child); + ++void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, ++ uint64_t *shared_perm); ++ + /** + * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use + * bdrv_child_refresh_perms() instead and make the parent's +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch b/SOURCES/kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch new file mode 100644 index 0000000..0d4a000 --- /dev/null +++ b/SOURCES/kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch @@ -0,0 +1,145 @@ +From 41d6c207c482093df8669f7cdcdb49bb25dba741 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:12 +0100 +Subject: [PATCH 07/26] block: Make it easier to learn which BDS support + bitmaps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-5-eblake@redhat.com> +Patchwork-id: 97071 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 04/12] block: Make it easier to learn which BDS support bitmaps +Bugzilla: 1779893 1779904 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Upcoming patches will enhance bitmap support in qemu-img, but in doing +so, it turns out to be nice to suppress output when persistent bitmaps +make no sense (such as on a qcow2 v2 image). Add a hook to make this +easier to query. + +This patch adds a new callback .bdrv_supports_persistent_dirty_bitmap, +rather than trying to shoehorn the answer in via existing callbacks. +In particular, while it might have been possible to overload +.bdrv_co_can_store_new_dirty_bitmap to special-case a NULL input to +answer whether any persistent bitmaps are supported, that is at odds +with whether a particular bitmap can be stored (for example, even on +an image that supports persistent bitmaps but has currently filled up +the maximum number of bitmaps, attempts to store another one should +fail); and the new functionality doesn't require coroutine safety. +Similarly, we could have added one more piece of information to +.bdrv_get_info, but then again, most callers to that function tend to +already discard extraneous information, and making it a catch-all +rather than a series of dedicated scalar queries hasn't really +simplified life. + +In the future, when we improve the ability to look up bitmaps through +a filter, we will probably also want to teach the block layer to +automatically let filters pass this request on through. + +Signed-off-by: Eric Blake +Message-Id: <20200513011648.166876-4-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit ef893b5c84f3199d777e33966dc28839f71b1a5c) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + block/dirty-bitmap.c | 9 +++++++++ + block/qcow2-bitmap.c | 7 +++++++ + block/qcow2.c | 2 ++ + block/qcow2.h | 1 + + include/block/block_int.h | 1 + + include/block/dirty-bitmap.h | 1 + + 6 files changed, 21 insertions(+) + +diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c +index 7039e82..2f96acc 100644 +--- a/block/dirty-bitmap.c ++++ b/block/dirty-bitmap.c +@@ -478,6 +478,15 @@ int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, + } + } + ++bool ++bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs) ++{ ++ if (bs->drv && bs->drv->bdrv_supports_persistent_dirty_bitmap) { ++ return bs->drv->bdrv_supports_persistent_dirty_bitmap(bs); ++ } ++ return false; ++} ++ + static bool coroutine_fn + bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp) +diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c +index c6c8ebb..cbac905 100644 +--- a/block/qcow2-bitmap.c ++++ b/block/qcow2-bitmap.c +@@ -1759,3 +1759,10 @@ fail: + name, bdrv_get_device_or_node_name(bs)); + return false; + } ++ ++bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs) ++{ ++ BDRVQcow2State *s = bs->opaque; ++ ++ return s->qcow_version >= 3; ++} +diff --git a/block/qcow2.c b/block/qcow2.c +index af0ad4a..36b0f7d 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -5551,6 +5551,8 @@ BlockDriver bdrv_qcow2 = { + .bdrv_detach_aio_context = qcow2_detach_aio_context, + .bdrv_attach_aio_context = qcow2_attach_aio_context, + ++ .bdrv_supports_persistent_dirty_bitmap = ++ qcow2_supports_persistent_dirty_bitmap, + .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap, + .bdrv_co_remove_persistent_dirty_bitmap = + qcow2_co_remove_persistent_dirty_bitmap, +diff --git a/block/qcow2.h b/block/qcow2.h +index 0942126..ceb1ceb 100644 +--- a/block/qcow2.h ++++ b/block/qcow2.h +@@ -767,6 +767,7 @@ bool qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, + int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); ++bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs); + + ssize_t coroutine_fn + qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 562dca1..cc18e8d 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -568,6 +568,7 @@ struct BlockDriver { + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + ++ bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); + bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, + const char *name, + uint32_t granularity, +diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h +index e2b20ec..f6e9a38 100644 +--- a/include/block/dirty-bitmap.h ++++ b/include/block/dirty-bitmap.h +@@ -16,6 +16,7 @@ typedef enum BitmapCheckFlags { + + #define BDRV_BITMAP_MAX_NAME_SIZE 1023 + ++bool bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs); + BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch b/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch new file mode 100644 index 0000000..de85205 --- /dev/null +++ b/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch @@ -0,0 +1,117 @@ +From 9ba321e18a357c1a3a238ceee301bbb174f96eee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:34 +0000 +Subject: [PATCH 14/20] block: Relax restrictions for blockdev-snapshot + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-9-kwolf@redhat.com> +Patchwork-id: 94285 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/13] block: Relax restrictions for blockdev-snapshot +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +blockdev-snapshot returned an error if the overlay was already in use, +which it defined as having any BlockBackend parent. This is in fact both +too strict (some parents can tolerate the change of visible data caused +by attaching a backing file) and too loose (some non-BlockBackend +parents may not be happy with it). + +One important use case that is prevented by the too strict check is live +storage migration with blockdev-mirror. Here, the target node is +usually opened without a backing file so that the active layer is +mirrored while its backing chain can be copied in the background. + +The backing chain should be attached to the mirror target node when +finalising the job, just before switching the users of the source node +to the new copy (at which point the mirror job still has a reference to +the node). drive-mirror did this automatically, but with blockdev-mirror +this is the job of the QMP client, so it needs a way to do this. + +blockdev-snapshot is the obvious way, so this patch makes it work in +this scenario. The new condition is that no parent uses CONSISTENT_READ +permissions. This will ensure that the operation will still be blocked +when the node is attached to the guest device, so blockdev-snapshot +remains safe. + +(For the sake of completeness, x-blockdev-reopen can be used to achieve +the same, however it is a big hammer, performs the graph change +completely unchecked and is still experimental. So even with the option +of using x-blockdev-reopen, there are reasons why blockdev-snapshot +should be able to perform this operation.) + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-3-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Tested-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit d29d3d1f80b3947fb26e7139645c83de66d146a9) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 14 ++++++++------ + tests/qemu-iotests/085.out | 4 ++-- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 4cd9a58..7918533 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1536,6 +1536,7 @@ static void external_snapshot_prepare(BlkActionState *common, + TransactionAction *action = common->action; + AioContext *aio_context; + AioContext *old_context; ++ uint64_t perm, shared; + int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar +@@ -1656,16 +1657,17 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + +- if (bdrv_has_blk(state->new_bs)) { ++ /* ++ * Allow attaching a backing file to an overlay that's already in use only ++ * if the parents don't assume that they are already seeing a valid image. ++ * (Specifically, allow it as a mirror target, which is write-only access.) ++ */ ++ bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); ++ if (perm & BLK_PERM_CONSISTENT_READ) { + error_setg(errp, "The overlay is already in use"); + goto out; + } + +- if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, +- errp)) { +- goto out; +- } +- + if (state->new_bs->backing != NULL) { + error_setg(errp, "The overlay already has a backing image"); + goto out; +diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out +index bb50227..487d920 100644 +--- a/tests/qemu-iotests/085.out ++++ b/tests/qemu-iotests/085.out +@@ -82,7 +82,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + === Invalid command - cannot create a snapshot using a file BDS === + + { 'execute': 'blockdev-snapshot', 'arguments': { 'node':'virtio0', 'overlay':'file_12' } } +-{"error": {"class": "GenericError", "desc": "The overlay does not support backing images"}} ++{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} + + === Invalid command - snapshot node used as active layer === + +@@ -96,7 +96,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + === Invalid command - snapshot node used as backing hd === + + { 'execute': 'blockdev-snapshot', 'arguments': { 'node': 'virtio0', 'overlay':'snap_11' } } +-{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} ++{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} + + === Invalid command - snapshot node has a backing image === + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch new file mode 100644 index 0000000..ea796d5 --- /dev/null +++ b/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -0,0 +1,57 @@ +From 371d312300251c0dc24522607b06b7e47e760b53 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:32 +0000 +Subject: [PATCH 12/20] block: Versioned x-blockdev-reopen API with feature + flag + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-7-kwolf@redhat.com> +Patchwork-id: 94283 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/13] block: Versioned x-blockdev-reopen API with feature flag +Bugzilla: 1790482 1805143 +RH-Acked-by: Eric Blake +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +x-blockdev-reopen is still considered unstable upstream. libvirt needs +(a small subset of) it for incremental backups, though. + +Add a downstream-only feature flag that effectively makes this a +versioned interface. As long as the feature is present, we promise that +we won't change the interface incompatibly. Incompatible changes to the +command will require us to drop the feature flag (and possibly introduce +a new one if the new version is still not stable upstream). + +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + qapi/block-core.json | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 0cf68fe..a1e85b0 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4202,10 +4202,17 @@ + # image does not have a default backing file name as part of its + # metadata. + # ++# Features: ++# @__com.redhat_rhel-av-8_2_0-api: Versioning the downstream interface while ++# it's still unstable upstream. As long as ++# this flag is present, this command will not ++# change incompatibly. ++# + # Since: 4.0 + ## + { 'command': 'x-blockdev-reopen', +- 'data': 'BlockdevOptions', 'boxed': true } ++ 'data': 'BlockdevOptions', 'boxed': true, ++ 'features': [ '__com.redhat_rhel-av-8_2_0-api' ] } + + ## + # @blockdev-del: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch b/SOURCES/kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch new file mode 100644 index 0000000..d1511d2 --- /dev/null +++ b/SOURCES/kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch @@ -0,0 +1,308 @@ +From 67f36d057aa71ca56ebc17ef28a7cb70bac6c6b6 Mon Sep 17 00:00:00 2001 +From: "Daniel P. Berrange" +Date: Tue, 5 May 2020 16:46:01 +0100 +Subject: [PATCH 01/12] block: always fill entire LUKS header space with zeros +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrange +Message-id: <20200505164601.1059974-2-berrange@redhat.com> +Patchwork-id: 96277 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] block: always fill entire LUKS header space with zeros +Bugzilla: 1775462 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi + +When initializing the LUKS header the size with default encryption +parameters will currently be 2068480 bytes. This is rounded up to +a multiple of the cluster size, 2081792, with 64k sectors. If the +end of the header is not the same as the end of the cluster we fill +the extra space with zeros. This was forgetting that not even the +space allocated for the header will be fully initialized, as we +only write key material for the first key slot. The space left +for the other 7 slots is never written to. + +An optimization to the ref count checking code: + + commit a5fff8d4b4d928311a5005efa12d0991fe3b66f9 (refs/bisect/bad) + Author: Vladimir Sementsov-Ogievskiy + Date: Wed Feb 27 16:14:30 2019 +0300 + + qcow2-refcount: avoid eating RAM + +made the assumption that every cluster which was allocated would +have at least some data written to it. This was violated by way +the LUKS header is only partially written, with much space simply +reserved for future use. + +Depending on the cluster size this problem was masked by the +logic which wrote zeros between the end of the LUKS header and +the end of the cluster. + +$ qemu-img create --object secret,id=cluster_encrypt0,data=123456 \ + -f qcow2 -o cluster_size=2k,encrypt.iter-time=1,\ + encrypt.format=luks,encrypt.key-secret=cluster_encrypt0 \ + cluster_size_check.qcow2 100M + Formatting 'cluster_size_check.qcow2', fmt=qcow2 size=104857600 + encrypt.format=luks encrypt.key-secret=cluster_encrypt0 + encrypt.iter-time=1 cluster_size=2048 lazy_refcounts=off refcount_bits=16 + +$ qemu-img check --object secret,id=cluster_encrypt0,data=redhat \ + 'json:{"driver": "qcow2", "encrypt.format": "luks", \ + "encrypt.key-secret": "cluster_encrypt0", \ + "file.driver": "file", "file.filename": "cluster_size_check.qcow2"}' +ERROR: counting reference for region exceeding the end of the file by one cluster or more: offset 0x2000 size 0x1f9000 +Leaked cluster 4 refcount=1 reference=0 +...snip... +Leaked cluster 130 refcount=1 reference=0 + +1 errors were found on the image. +Data may be corrupted, or further writes to the image may corrupt it. + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Image end offset: 268288 + +The problem only exists when the disk image is entirely empty. Writing +data to the disk image payload will solve the problem by causing the +end of the file to be extended further. + +The change fixes it by ensuring that the entire allocated LUKS header +region is fully initialized with zeros. The qemu-img check will still +fail for any pre-existing disk images created prior to this change, +unless at least 1 byte of the payload is written to. + +Fully writing zeros to the entire LUKS header is a good idea regardless +as it ensures that space has been allocated on the host filesystem (or +whatever block storage backend is used). + +Signed-off-by: Daniel P. Berrangé +Message-Id: <20200207135520.2669430-1-berrange@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Max Reitz +(cherry picked from commit 087ab8e775f48766068e65de1bc99d03b40d1670) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + tests/qemu-iotests/group: no test 283 in downstream + +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2.c | 11 ++++-- + tests/qemu-iotests/284 | 97 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/284.out | 62 +++++++++++++++++++++++++++++ + tests/qemu-iotests/group | 1 + + 4 files changed, 167 insertions(+), 4 deletions(-) + create mode 100755 tests/qemu-iotests/284 + create mode 100644 tests/qemu-iotests/284.out + +diff --git a/block/qcow2.c b/block/qcow2.c +index 71067c6..af0ad4a 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -135,13 +135,16 @@ static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, + s->crypto_header.length = headerlen; + s->crypto_header.offset = ret; + +- /* Zero fill remaining space in cluster so it has predictable +- * content in case of future spec changes */ ++ /* ++ * Zero fill all space in cluster so it has predictable ++ * content, as we may not initialize some regions of the ++ * header (eg only 1 out of 8 key slots will be initialized) ++ */ + clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; + assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); + ret = bdrv_pwrite_zeroes(bs->file, +- ret + headerlen, +- clusterlen - headerlen, 0); ++ ret, ++ clusterlen, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not zero fill encryption header"); + return -1; +diff --git a/tests/qemu-iotests/284 b/tests/qemu-iotests/284 +new file mode 100755 +index 0000000..071e89b +--- /dev/null ++++ b/tests/qemu-iotests/284 +@@ -0,0 +1,97 @@ ++#!/usr/bin/env bash ++# ++# Test ref count checks on encrypted images ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=berrange@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt qcow2 ++_supported_proto generic ++_supported_os Linux ++ ++ ++size=1M ++ ++SECRET="secret,id=sec0,data=astrochicken" ++ ++IMGSPEC="driver=$IMGFMT,file.filename=$TEST_IMG,encrypt.key-secret=sec0" ++QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT ++ ++_run_test() ++{ ++ IMGOPTSSYNTAX=true ++ OLD_TEST_IMG="$TEST_IMG" ++ TEST_IMG="driver=$IMGFMT,file.filename=$TEST_IMG,encrypt.key-secret=sec0" ++ QEMU_IMG_EXTRA_ARGS="--image-opts --object $SECRET" ++ ++ echo ++ echo "== cluster size $csize" ++ echo "== checking image refcounts ==" ++ _check_test_img ++ ++ echo ++ echo "== writing some data ==" ++ $QEMU_IO -c "write -P 0x9 0 1" $QEMU_IMG_EXTRA_ARGS $TEST_IMG | _filter_qemu_io | _filter_testdir ++ echo ++ echo "== rechecking image refcounts ==" ++ _check_test_img ++ ++ echo ++ echo "== writing some more data ==" ++ $QEMU_IO -c "write -P 0x9 $csize 1" $QEMU_IMG_EXTRA_ARGS $TEST_IMG | _filter_qemu_io | _filter_testdir ++ echo ++ echo "== rechecking image refcounts ==" ++ _check_test_img ++ ++ TEST_IMG="$OLD_TEST_IMG" ++ QEMU_IMG_EXTRA_ARGS= ++ IMGOPTSSYNTAX= ++} ++ ++ ++echo ++echo "testing LUKS qcow2 encryption" ++echo ++ ++for csize in 512 2048 32768 ++do ++ _make_test_img --object $SECRET -o "encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10,cluster_size=$csize" $size ++ _run_test ++ _cleanup_test_img ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/284.out b/tests/qemu-iotests/284.out +new file mode 100644 +index 0000000..48216f5 +--- /dev/null ++++ b/tests/qemu-iotests/284.out +@@ -0,0 +1,62 @@ ++QA output created by 284 ++ ++testing LUKS qcow2 encryption ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 ++ ++== cluster size 512 ++== checking image refcounts == ++No errors were found on the image. ++ ++== writing some data == ++wrote 1/1 bytes at offset 0 ++1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== rechecking image refcounts == ++No errors were found on the image. ++ ++== writing some more data == ++wrote 1/1 bytes at offset 512 ++1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== rechecking image refcounts == ++No errors were found on the image. ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 ++ ++== cluster size 2048 ++== checking image refcounts == ++No errors were found on the image. ++ ++== writing some data == ++wrote 1/1 bytes at offset 0 ++1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== rechecking image refcounts == ++No errors were found on the image. ++ ++== writing some more data == ++wrote 1/1 bytes at offset 2048 ++1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== rechecking image refcounts == ++No errors were found on the image. ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 ++ ++== cluster size 32768 ++== checking image refcounts == ++No errors were found on the image. ++ ++== writing some data == ++wrote 1/1 bytes at offset 0 ++1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== rechecking image refcounts == ++No errors were found on the image. ++ ++== writing some more data == ++wrote 1/1 bytes at offset 32768 ++1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== rechecking image refcounts == ++No errors were found on the image. ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index e47cbfc..9c565cf 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -289,3 +289,4 @@ + 277 rw quick + 280 rw migration quick + 281 rw quick ++284 rw +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-backend-Add-flags-to-blk_truncate.patch b/SOURCES/kvm-block-backend-Add-flags-to-blk_truncate.patch new file mode 100644 index 0000000..5b212fc --- /dev/null +++ b/SOURCES/kvm-block-backend-Add-flags-to-blk_truncate.patch @@ -0,0 +1,294 @@ +From 07a93e74efa4861f54dd3d4bec01885f7af2fee3 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 17:01:32 +0200 +Subject: [PATCH 04/17] block-backend: Add flags to blk_truncate() + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-4-kwolf@redhat.com> +Patchwork-id: 97450 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 03/11] block-backend: Add flags to blk_truncate() +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +Now that node level interface bdrv_truncate() supports passing request +flags to the block driver, expose this on the BlockBackend level, too. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Max Reitz +Message-Id: <20200424125448.63318-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8c6242b6f383e43fd11d2c50f8bcdd2bba1100fc) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 3 ++- + block/block-backend.c | 4 ++-- + block/commit.c | 4 ++-- + block/crypto.c | 2 +- + block/mirror.c | 2 +- + block/qcow2.c | 4 ++-- + block/qed.c | 2 +- + block/vdi.c | 2 +- + block/vhdx.c | 4 ++-- + block/vmdk.c | 6 +++--- + block/vpc.c | 2 +- + blockdev.c | 2 +- + include/sysemu/block-backend.h | 2 +- + qemu-img.c | 2 +- + qemu-io-cmds.c | 2 +- + 15 files changed, 22 insertions(+), 21 deletions(-) + +diff --git a/block.c b/block.c +index d6a05da..12c8941 100644 +--- a/block.c ++++ b/block.c +@@ -547,7 +547,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, + int64_t size; + int ret; + +- ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); ++ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, ++ &local_err); + if (ret < 0 && ret != -ENOTSUP) { + error_propagate(errp, local_err); + return ret; +diff --git a/block/block-backend.c b/block/block-backend.c +index 8be2006..17ed6d8 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -2137,14 +2137,14 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, + } + + int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) + { + if (!blk_is_available(blk)) { + error_setg(errp, "No medium inserted"); + return -ENOMEDIUM; + } + +- return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); ++ return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp); + } + + int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, +diff --git a/block/commit.c b/block/commit.c +index 23c90b3..075ebf8 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -155,7 +155,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) + } + + if (base_len < len) { +- ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL); ++ ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); + if (ret) { + goto out; + } +@@ -471,7 +471,7 @@ int bdrv_commit(BlockDriverState *bs) + * grow the backing file image if possible. If not possible, + * we must return an error */ + if (length > backing_length) { +- ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, ++ ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0, + &local_err); + if (ret < 0) { + error_report_err(local_err); +diff --git a/block/crypto.c b/block/crypto.c +index fcb4a97..83a8fc0 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -115,7 +115,7 @@ static ssize_t block_crypto_init_func(QCryptoBlock *block, + * which will be used by the crypto header + */ + return blk_truncate(data->blk, data->size + headerlen, false, +- data->prealloc, errp); ++ data->prealloc, 0, errp); + } + + +diff --git a/block/mirror.c b/block/mirror.c +index 0d32fca..c8028cd 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -886,7 +886,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + if (s->base == blk_bs(s->target)) { + if (s->bdev_length > target_length) { + ret = blk_truncate(s->target, s->bdev_length, false, +- PREALLOC_MODE_OFF, NULL); ++ PREALLOC_MODE_OFF, 0, NULL); + if (ret < 0) { + goto immediate_exit; + } +diff --git a/block/qcow2.c b/block/qcow2.c +index c0fdcb9..86aa74a 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3497,7 +3497,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + + /* Okay, now that we have a valid image, let's give it the right size */ + ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, +- errp); ++ 0, errp); + if (ret < 0) { + error_prepend(errp, "Could not resize image: "); + goto out; +@@ -5347,7 +5347,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, + * Amending image options should ensure that the image has + * exactly the given new values, so pass exact=true here. + */ +- ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp); ++ ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp); + blk_unref(blk); + if (ret < 0) { + return ret; +diff --git a/block/qed.c b/block/qed.c +index fb6100b..b0fdb8f 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -677,7 +677,7 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, + * The QED format associates file length with allocation status, + * so a new file (which is empty) must have a length of 0. + */ +- ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp); ++ ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto out; + } +diff --git a/block/vdi.c b/block/vdi.c +index e1a11f2..0c7835a 100644 +--- a/block/vdi.c ++++ b/block/vdi.c +@@ -875,7 +875,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, + + if (image_type == VDI_TYPE_STATIC) { + ret = blk_truncate(blk, offset + blocks * block_size, false, +- PREALLOC_MODE_OFF, errp); ++ PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + error_prepend(errp, "Failed to statically allocate file"); + goto exit; +diff --git a/block/vhdx.c b/block/vhdx.c +index 5dfbb20..21497f7 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -1703,13 +1703,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, + /* All zeroes, so we can just extend the file - the end of the BAT + * is the furthest thing we have written yet */ + ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, +- errp); ++ 0, errp); + if (ret < 0) { + goto exit; + } + } else if (type == VHDX_TYPE_FIXED) { + ret = blk_truncate(blk, data_file_offset + image_size, false, +- PREALLOC_MODE_OFF, errp); ++ PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto exit; + } +diff --git a/block/vmdk.c b/block/vmdk.c +index 1bbf937..1bd3991 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2118,7 +2118,7 @@ static int vmdk_init_extent(BlockBackend *blk, + int gd_buf_size; + + if (flat) { +- ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp); ++ ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp); + goto exit; + } + magic = cpu_to_be32(VMDK4_MAGIC); +@@ -2182,7 +2182,7 @@ static int vmdk_init_extent(BlockBackend *blk, + } + + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false, +- PREALLOC_MODE_OFF, errp); ++ PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto exit; + } +@@ -2523,7 +2523,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, + /* bdrv_pwrite write padding zeros to align to sector, we don't need that + * for description file */ + if (desc_offset == 0) { +- ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp); ++ ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + goto exit; + } +diff --git a/block/vpc.c b/block/vpc.c +index 6df75e2..d5e7dc8 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -898,7 +898,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, + /* Add footer to total size */ + total_size += HEADER_SIZE; + +- ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp); ++ ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp); + if (ret < 0) { + return ret; + } +diff --git a/blockdev.c b/blockdev.c +index 5128c9b..6dde52a 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3055,7 +3055,7 @@ void qmp_block_resize(bool has_device, const char *device, + } + + bdrv_drained_begin(bs); +- ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp); ++ ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); + bdrv_drained_end(bs); + + out: +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index 9bbdbd6..34de7fa 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -237,7 +237,7 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, + int bytes); + int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, +- PreallocMode prealloc, Error **errp); ++ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); + int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); + int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size); +diff --git a/qemu-img.c b/qemu-img.c +index 6dc881b..a27ad70 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -3939,7 +3939,7 @@ static int img_resize(int argc, char **argv) + * resizing, so pass @exact=true. It is of no use to report + * success when the image has not actually been resized. + */ +- ret = blk_truncate(blk, total_size, true, prealloc, &err); ++ ret = blk_truncate(blk, total_size, true, prealloc, 0, &err); + if (!ret) { + qprintf(quiet, "Image resized.\n"); + } else { +diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c +index 1b7e700..851f07e 100644 +--- a/qemu-io-cmds.c ++++ b/qemu-io-cmds.c +@@ -1715,7 +1715,7 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv) + * exact=true. It is better to err on the "emit more errors" side + * than to be overly permissive. + */ +- ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err); ++ ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch b/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch new file mode 100644 index 0000000..9d49cfa --- /dev/null +++ b/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch @@ -0,0 +1,158 @@ +From 6cc456c4c1e6557fdc7e138e8ef8171b71609222 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:15 +0100 +Subject: [PATCH 4/6] block-backend: Reorder flush/pdiscard function + definitions + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-5-kwolf@redhat.com> +Patchwork-id: 94598 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] block-backend: Reorder flush/pdiscard function definitions +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +Move all variants of the flush/pdiscard functions to a single place and +put the blk_co_*() version first because it is called by all other +variants (and will become static in the next patch). + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20200407121259.21350-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 564806c529d4e0acad209b1e5b864a8886092f1f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 92 +++++++++++++++++++++++++-------------------------- + 1 file changed, 46 insertions(+), 46 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 8b8f2a8..17b2e87 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1488,38 +1488,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + blk_aio_write_entry, flags, cb, opaque); + } + +-static void blk_aio_flush_entry(void *opaque) +-{ +- BlkAioEmAIOCB *acb = opaque; +- BlkRwCo *rwco = &acb->rwco; +- +- rwco->ret = blk_co_flush(rwco->blk); +- blk_aio_complete(acb); +-} +- +-BlockAIOCB *blk_aio_flush(BlockBackend *blk, +- BlockCompletionFunc *cb, void *opaque) +-{ +- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); +-} +- +-static void blk_aio_pdiscard_entry(void *opaque) +-{ +- BlkAioEmAIOCB *acb = opaque; +- BlkRwCo *rwco = &acb->rwco; +- +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); +- blk_aio_complete(acb); +-} +- +-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, +- int64_t offset, int bytes, +- BlockCompletionFunc *cb, void *opaque) +-{ +- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, +- cb, opaque); +-} +- + void blk_aio_cancel(BlockAIOCB *acb) + { + bdrv_aio_cancel(acb); +@@ -1586,6 +1554,37 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + return bdrv_co_pdiscard(blk->root, offset, bytes); + } + ++static void blk_aio_pdiscard_entry(void *opaque) ++{ ++ BlkAioEmAIOCB *acb = opaque; ++ BlkRwCo *rwco = &acb->rwco; ++ ++ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); ++ blk_aio_complete(acb); ++} ++ ++BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, ++ int64_t offset, int bytes, ++ BlockCompletionFunc *cb, void *opaque) ++{ ++ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, ++ cb, opaque); ++} ++ ++static void blk_pdiscard_entry(void *opaque) ++{ ++ BlkRwCo *rwco = opaque; ++ QEMUIOVector *qiov = rwco->iobuf; ++ ++ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); ++ aio_wait_kick(); ++} ++ ++int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++{ ++ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); ++} ++ + int blk_co_flush(BlockBackend *blk) + { + blk_wait_while_drained(blk); +@@ -1597,6 +1596,21 @@ int blk_co_flush(BlockBackend *blk) + return bdrv_co_flush(blk_bs(blk)); + } + ++static void blk_aio_flush_entry(void *opaque) ++{ ++ BlkAioEmAIOCB *acb = opaque; ++ BlkRwCo *rwco = &acb->rwco; ++ ++ rwco->ret = blk_co_flush(rwco->blk); ++ blk_aio_complete(acb); ++} ++ ++BlockAIOCB *blk_aio_flush(BlockBackend *blk, ++ BlockCompletionFunc *cb, void *opaque) ++{ ++ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); ++} ++ + static void blk_flush_entry(void *opaque) + { + BlkRwCo *rwco = opaque; +@@ -2083,20 +2097,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + return bdrv_truncate(blk->root, offset, exact, prealloc, errp); + } + +-static void blk_pdiscard_entry(void *opaque) +-{ +- BlkRwCo *rwco = opaque; +- QEMUIOVector *qiov = rwco->iobuf; +- +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); +- aio_wait_kick(); +-} +- +-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +-{ +- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); +-} +- + int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size) + { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch b/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch new file mode 100644 index 0000000..45f506c --- /dev/null +++ b/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch @@ -0,0 +1,130 @@ +From aefff389c4d11bd69180db7177135c4645a9b1bd Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:46 +0000 +Subject: [PATCH 13/18] block/backup-top: Don't acquire context while dropping + top + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-7-slp@redhat.com> +Patchwork-id: 93759 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/9] block/backup-top: Don't acquire context while dropping top +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +All paths that lead to bdrv_backup_top_drop(), except for the call +from backup_clean(), imply that the BDS AioContext has already been +acquired, so doing it there too can potentially lead to QEMU hanging +on AIO_WAIT_WHILE(). + +An easy way to trigger this situation is by issuing a two actions +transaction, with a proper and a bogus blockdev-backup, so the second +one will trigger a rollback. This will trigger a hang with an stack +trace like this one: + + #0 0x00007fb680c75016 in __GI_ppoll (fds=0x55e74580f7c0, nfds=1, timeout=, + timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 + #1 0x000055e743386e09 in ppoll (__ss=0x0, __timeout=0x0, __nfds=, __fds=) + at /usr/include/bits/poll2.h:77 + #2 0x000055e743386e09 in qemu_poll_ns + (fds=, nfds=, timeout=) at util/qemu-timer.c:336 + #3 0x000055e743388dc4 in aio_poll (ctx=0x55e7458925d0, blocking=blocking@entry=true) + at util/aio-posix.c:669 + #4 0x000055e743305dea in bdrv_flush (bs=bs@entry=0x55e74593c0d0) at block/io.c:2878 + #5 0x000055e7432be58e in bdrv_close (bs=0x55e74593c0d0) at block.c:4017 + #6 0x000055e7432be58e in bdrv_delete (bs=) at block.c:4262 + #7 0x000055e7432be58e in bdrv_unref (bs=bs@entry=0x55e74593c0d0) at block.c:5644 + #8 0x000055e743316b9b in bdrv_backup_top_drop (bs=bs@entry=0x55e74593c0d0) at block/backup-top.c:273 + #9 0x000055e74331461f in backup_job_create + (job_id=0x0, bs=bs@entry=0x55e7458d5820, target=target@entry=0x55e74589f640, speed=0, sync_mode=MIRROR_SYNC_MODE_FULL, sync_bitmap=sync_bitmap@entry=0x0, bitmap_mode=BITMAP_SYNC_MODE_ON_SUCCESS, compress=false, filter_node_name=0x0, on_source_error=BLOCKDEV_ON_ERROR_REPORT, on_target_error=BLOCKDEV_ON_ERROR_REPORT, creation_flags=0, cb=0x0, opaque=0x0, txn=0x0, errp=0x7ffddfd1efb0) at block/backup.c:478 + #10 0x000055e74315bc52 in do_backup_common + (backup=backup@entry=0x55e746c066d0, bs=bs@entry=0x55e7458d5820, target_bs=target_bs@entry=0x55e74589f640, aio_context=aio_context@entry=0x55e7458a91e0, txn=txn@entry=0x0, errp=errp@entry=0x7ffddfd1efb0) + at blockdev.c:3580 + #11 0x000055e74315c37c in do_blockdev_backup + (backup=backup@entry=0x55e746c066d0, txn=0x0, errp=errp@entry=0x7ffddfd1efb0) + at /usr/src/debug/qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64/./qapi/qapi-types-block-core.h:1492 + #12 0x000055e74315c449 in blockdev_backup_prepare (common=0x55e746a8de90, errp=0x7ffddfd1f018) + at blockdev.c:1885 + #13 0x000055e743160152 in qmp_transaction + (dev_list=, has_props=, props=0x55e7467fe2c0, errp=errp@entry=0x7ffddfd1f088) at blockdev.c:2340 + #14 0x000055e743287ff5 in qmp_marshal_transaction + (args=, ret=, errp=0x7ffddfd1f0f8) + at qapi/qapi-commands-transaction.c:44 + #15 0x000055e74333de6c in do_qmp_dispatch + (errp=0x7ffddfd1f0f0, allow_oob=, request=, cmds=0x55e743c28d60 ) at qapi/qmp-dispatch.c:132 + #16 0x000055e74333de6c in qmp_dispatch + (cmds=0x55e743c28d60 , request=, allow_oob=) + at qapi/qmp-dispatch.c:175 + #17 0x000055e74325c061 in monitor_qmp_dispatch (mon=0x55e745908030, req=) + at monitor/qmp.c:145 + #18 0x000055e74325c6fa in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:234 + #19 0x000055e743385866 in aio_bh_call (bh=0x55e745807ae0) at util/async.c:117 + #20 0x000055e743385866 in aio_bh_poll (ctx=ctx@entry=0x55e7458067a0) at util/async.c:117 + #21 0x000055e743388c54 in aio_dispatch (ctx=0x55e7458067a0) at util/aio-posix.c:459 + #22 0x000055e743385742 in aio_ctx_dispatch + (source=, callback=, user_data=) at util/async.c:260 + #23 0x00007fb68543e67d in g_main_dispatch (context=0x55e745893a40) at gmain.c:3176 + #24 0x00007fb68543e67d in g_main_context_dispatch (context=context@entry=0x55e745893a40) at gmain.c:3829 + #25 0x000055e743387d08 in glib_pollfds_poll () at util/main-loop.c:219 + #26 0x000055e743387d08 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #27 0x000055e743387d08 in main_loop_wait (nonblocking=) at util/main-loop.c:518 + #28 0x000055e74316a3c1 in main_loop () at vl.c:1828 + #29 0x000055e743016a72 in main (argc=, argv=, envp=) + at vl.c:4504 + +Fix this by not acquiring the AioContext there, and ensuring all paths +leading to it have it already acquired (backup_clean()). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782111 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 0abf2581717a19d9749d5c2ff8acd0ac203452c2) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + block/backup-top.c | 5 ----- + block/backup.c | 3 +++ + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/block/backup-top.c b/block/backup-top.c +index 818d3f2..b8d863f 100644 +--- a/block/backup-top.c ++++ b/block/backup-top.c +@@ -255,9 +255,6 @@ append_failed: + void bdrv_backup_top_drop(BlockDriverState *bs) + { + BDRVBackupTopState *s = bs->opaque; +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- aio_context_acquire(aio_context); + + bdrv_drained_begin(bs); + +@@ -271,6 +268,4 @@ void bdrv_backup_top_drop(BlockDriverState *bs) + bdrv_drained_end(bs); + + bdrv_unref(bs); +- +- aio_context_release(aio_context); + } +diff --git a/block/backup.c b/block/backup.c +index cf62b1a..1383e21 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -135,8 +135,11 @@ static void backup_abort(Job *job) + static void backup_clean(Job *job) + { + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); ++ AioContext *aio_context = bdrv_get_aio_context(s->backup_top); + ++ aio_context_acquire(aio_context); + bdrv_backup_top_drop(s->backup_top); ++ aio_context_release(aio_context); + } + + void backup_do_checkpoint(BlockJob *job, Error **errp) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch b/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch new file mode 100644 index 0000000..745be9f --- /dev/null +++ b/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch @@ -0,0 +1,114 @@ +From 1e0582ad34e77a060e2067a35992979c9eae82c9 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:31 +0000 +Subject: [PATCH 11/20] block: bdrv_reopen() with backing file in different + AioContext + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-6-kwolf@redhat.com> +Patchwork-id: 94282 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/13] block: bdrv_reopen() with backing file in different AioContext +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +This patch allows bdrv_reopen() (and therefore the x-blockdev-reopen QMP +command) to attach a node as the new backing file even if the node is in +a different AioContext than the parent if one of both nodes can be moved +to the AioContext of the other node. + +Signed-off-by: Kevin Wolf +Tested-by: Peter Krempa +Message-Id: <20200306141413.30705-3-kwolf@redhat.com> +Reviewed-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit 1de6b45fb5c1489b450df7d1a4c692bba9678ce6) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 32 ++++++++++++++++++++++++++------ + tests/qemu-iotests/245 | 8 +++----- + 2 files changed, 29 insertions(+), 11 deletions(-) + +diff --git a/block.c b/block.c +index a744bb5..39e4647 100644 +--- a/block.c ++++ b/block.c +@@ -3749,6 +3749,29 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, + *shared = cumulative_shared_perms; + } + ++static bool bdrv_reopen_can_attach(BlockDriverState *parent, ++ BdrvChild *child, ++ BlockDriverState *new_child, ++ Error **errp) ++{ ++ AioContext *parent_ctx = bdrv_get_aio_context(parent); ++ AioContext *child_ctx = bdrv_get_aio_context(new_child); ++ GSList *ignore; ++ bool ret; ++ ++ ignore = g_slist_prepend(NULL, child); ++ ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); ++ g_slist_free(ignore); ++ if (ret) { ++ return ret; ++ } ++ ++ ignore = g_slist_prepend(NULL, child); ++ ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); ++ g_slist_free(ignore); ++ return ret; ++} ++ + /* + * Take a BDRVReopenState and check if the value of 'backing' in the + * reopen_state->options QDict is valid or not. +@@ -3800,14 +3823,11 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, + } + + /* +- * TODO: before removing the x- prefix from x-blockdev-reopen we +- * should move the new backing file into the right AioContext +- * instead of returning an error. ++ * Check AioContext compatibility so that the bdrv_set_backing_hd() call in ++ * bdrv_reopen_commit() won't fail. + */ + if (new_backing_bs) { +- if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) { +- error_setg(errp, "Cannot use a new backing file " +- "with a different AioContext"); ++ if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { + return -EINVAL; + } + } +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index f69c2fa..919131d 100644 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -1013,18 +1013,16 @@ class TestBlockdevReopen(iotests.QMPTestCase): + # neither of them can switch to the other AioContext + def test_iothreads_error(self): + self.run_test_iothreads('iothread0', 'iothread1', +- "Cannot use a new backing file with a different AioContext") ++ "Cannot change iothread of active block backend") + + def test_iothreads_compatible_users(self): + self.run_test_iothreads('iothread0', 'iothread0') + + def test_iothreads_switch_backing(self): +- self.run_test_iothreads('iothread0', None, +- "Cannot use a new backing file with a different AioContext") ++ self.run_test_iothreads('iothread0', None) + + def test_iothreads_switch_overlay(self): +- self.run_test_iothreads(None, 'iothread0', +- "Cannot use a new backing file with a different AioContext") ++ self.run_test_iothreads(None, 'iothread0') + + if __name__ == '__main__': + iotests.main(supported_fmts=["qcow2"], +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch b/SOURCES/kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch new file mode 100644 index 0000000..a974a18 --- /dev/null +++ b/SOURCES/kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch @@ -0,0 +1,55 @@ +From 5e5ca17e1e09cfe9a780c556528bbde23c93fc4e Mon Sep 17 00:00:00 2001 +From: Richard Jones +Date: Thu, 28 May 2020 14:27:37 +0100 +Subject: [PATCH 03/26] block/curl: HTTP header field names are case + insensitive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Richard Jones +Message-id: <20200528142737.17318-3-rjones@redhat.com> +Patchwork-id: 96895 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block/curl: HTTP header field names are case insensitive +Bugzilla: 1841038 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Philippe Mathieu-Daudé + +From: David Edmondson + +RFC 7230 section 3.2 indicates that HTTP header field names are case +insensitive. + +Signed-off-by: David Edmondson +Message-Id: <20200224101310.101169-3-david.edmondson@oracle.com> +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 69032253c33ae1774233c63cedf36d32242a85fc) +Signed-off-by: Danilo C. L. de Paula +--- + block/curl.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/curl.c b/block/curl.c +index f9ffb7f..6e32590 100644 +--- a/block/curl.c ++++ b/block/curl.c +@@ -216,11 +216,12 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) + size_t realsize = size * nmemb; + const char *header = (char *)ptr; + const char *end = header + realsize; +- const char *accept_ranges = "Accept-Ranges:"; ++ const char *accept_ranges = "accept-ranges:"; + const char *bytes = "bytes"; + + if (realsize >= strlen(accept_ranges) +- && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { ++ && g_ascii_strncasecmp(header, accept_ranges, ++ strlen(accept_ranges)) == 0) { + + char *p = strchr(header, ':') + 1; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch b/SOURCES/kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch new file mode 100644 index 0000000..c09a1e2 --- /dev/null +++ b/SOURCES/kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch @@ -0,0 +1,76 @@ +From e5ac775de83d3d22f13c74ab198780b8b579f684 Mon Sep 17 00:00:00 2001 +From: Richard Jones +Date: Thu, 28 May 2020 14:27:36 +0100 +Subject: [PATCH 02/26] block/curl: HTTP header fields allow whitespace around + values + +RH-Author: Richard Jones +Message-id: <20200528142737.17318-2-rjones@redhat.com> +Patchwork-id: 96894 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] block/curl: HTTP header fields allow whitespace around values +Bugzilla: 1841038 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Danilo de Paula + +From: David Edmondson + +RFC 7230 section 3.2 indicates that whitespace is permitted between +the field name and field value and after the field value. + +Signed-off-by: David Edmondson +Message-Id: <20200224101310.101169-2-david.edmondson@oracle.com> +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 7788a319399f17476ff1dd43164c869e320820a2) +Signed-off-by: Danilo C. L. de Paula +--- + block/curl.c | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +diff --git a/block/curl.c b/block/curl.c +index f862993..f9ffb7f 100644 +--- a/block/curl.c ++++ b/block/curl.c +@@ -214,11 +214,34 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) + { + BDRVCURLState *s = opaque; + size_t realsize = size * nmemb; +- const char *accept_line = "Accept-Ranges: bytes"; ++ const char *header = (char *)ptr; ++ const char *end = header + realsize; ++ const char *accept_ranges = "Accept-Ranges:"; ++ const char *bytes = "bytes"; + +- if (realsize >= strlen(accept_line) +- && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) { +- s->accept_range = true; ++ if (realsize >= strlen(accept_ranges) ++ && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { ++ ++ char *p = strchr(header, ':') + 1; ++ ++ /* Skip whitespace between the header name and value. */ ++ while (p < end && *p && g_ascii_isspace(*p)) { ++ p++; ++ } ++ ++ if (end - p >= strlen(bytes) ++ && strncmp(p, bytes, strlen(bytes)) == 0) { ++ ++ /* Check that there is nothing but whitespace after the value. */ ++ p += strlen(bytes); ++ while (p < end && *p && g_ascii_isspace(*p)) { ++ p++; ++ } ++ ++ if (p == end || !*p) { ++ s->accept_range = true; ++ } ++ } + } + + return realsize; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-introducing-bdrv_co_delete_file-interface.patch b/SOURCES/kvm-block-introducing-bdrv_co_delete_file-interface.patch new file mode 100644 index 0000000..9d5e659 --- /dev/null +++ b/SOURCES/kvm-block-introducing-bdrv_co_delete_file-interface.patch @@ -0,0 +1,99 @@ +From 9581770f48911cbe68cfa1a7fa125df2a0a27d02 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Sun, 31 May 2020 16:40:33 +0100 +Subject: [PATCH 5/7] block: introducing 'bdrv_co_delete_file' interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Maxim Levitsky +Message-id: <20200531164035.34188-2-mlevitsk@redhat.com> +Patchwork-id: 97057 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/3] block: introducing 'bdrv_co_delete_file' interface +Bugzilla: 1827630 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: John Snow +RH-Acked-by: Eric Blake + +From: Daniel Henrique Barboza + +Adding to Block Drivers the capability of being able to clean up +its created files can be useful in certain situations. For the +LUKS driver, for instance, a failure in one of its authentication +steps can leave files in the host that weren't there before. + +This patch adds the 'bdrv_co_delete_file' interface to block +drivers and add it to the 'file' driver in file-posix.c. The +implementation is given by 'raw_co_delete_file'. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Daniel Henrique Barboza +Message-Id: <20200130213907.2830642-2-danielhb413@gmail.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9bffae14df879255329473a7bd578643af2d4c9c) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 23 +++++++++++++++++++++++ + include/block/block_int.h | 4 ++++ + 2 files changed, 27 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index dd18d40..1609598 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2388,6 +2388,28 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, + return raw_co_create(&options, errp); + } + ++static int coroutine_fn raw_co_delete_file(BlockDriverState *bs, ++ Error **errp) ++{ ++ struct stat st; ++ int ret; ++ ++ if (!(stat(bs->filename, &st) == 0) || !S_ISREG(st.st_mode)) { ++ error_setg_errno(errp, ENOENT, "%s is not a regular file", ++ bs->filename); ++ return -ENOENT; ++ } ++ ++ ret = unlink(bs->filename); ++ if (ret < 0) { ++ ret = -errno; ++ error_setg_errno(errp, -ret, "Error when deleting file %s", ++ bs->filename); ++ } ++ ++ return ret; ++} ++ + /* + * Find allocation range in @bs around offset @start. + * May change underlying file descriptor's file offset. +@@ -3019,6 +3041,7 @@ BlockDriver bdrv_file = { + .bdrv_co_block_status = raw_co_block_status, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, ++ .bdrv_co_delete_file = raw_co_delete_file, + + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 529f153..562dca1 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -316,6 +316,10 @@ struct BlockDriver { + */ + int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); + ++ /* Delete a created file. */ ++ int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs, ++ Error **errp); ++ + /* + * Flushes all data that was already written to the OS all the way down to + * the disk (for example file-posix.c calls fsync()). +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch b/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch new file mode 100644 index 0000000..378ae1a --- /dev/null +++ b/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch @@ -0,0 +1,78 @@ +From 4ef2c464a54b0b618d933641ac0a7012e629fed9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:42 +0000 +Subject: [PATCH 01/20] block/nbd: Fix hang in .bdrv_close() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-2-mlevitsk@redhat.com> +Patchwork-id: 94224 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] block/nbd: Fix hang in .bdrv_close() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +When nbd_close() is called from a coroutine, the connection_co never +gets to run, and thus nbd_teardown_connection() hangs. + +This is because aio_co_enter() only puts the connection_co into the main +coroutine's wake-up queue, so this main coroutine needs to yield and +wait for connection_co to terminate. + +Suggested-by: Kevin Wolf +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-2-mreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +(cherry picked from commit 78c81a3f108870d325b0a39d88711366afe6f703) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/nbd.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/block/nbd.c b/block/nbd.c +index 5f18f78..a73f0d9 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -70,6 +70,7 @@ typedef struct BDRVNBDState { + CoMutex send_mutex; + CoQueue free_sema; + Coroutine *connection_co; ++ Coroutine *teardown_co; + QemuCoSleepState *connection_co_sleep_ns_state; + bool drained; + bool wait_drained_end; +@@ -203,7 +204,15 @@ static void nbd_teardown_connection(BlockDriverState *bs) + qemu_co_sleep_wake(s->connection_co_sleep_ns_state); + } + } +- BDRV_POLL_WHILE(bs, s->connection_co); ++ if (qemu_in_coroutine()) { ++ s->teardown_co = qemu_coroutine_self(); ++ /* connection_co resumes us when it terminates */ ++ qemu_coroutine_yield(); ++ s->teardown_co = NULL; ++ } else { ++ BDRV_POLL_WHILE(bs, s->connection_co); ++ } ++ assert(!s->connection_co); + } + + static bool nbd_client_connecting(BDRVNBDState *s) +@@ -395,6 +404,9 @@ static coroutine_fn void nbd_connection_entry(void *opaque) + s->ioc = NULL; + } + ++ if (s->teardown_co) { ++ aio_co_wake(s->teardown_co); ++ } + aio_wait_kick(); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch b/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch new file mode 100644 index 0000000..43f9ffc --- /dev/null +++ b/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch @@ -0,0 +1,328 @@ +From 25c528b30f8774f33e957d14060805398da524d9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 26 Mar 2020 20:23:06 +0000 +Subject: [PATCH 1/4] block: pass BlockDriver reference to the .bdrv_co_create + +RH-Author: Maxim Levitsky +Message-id: <20200326202307.9264-2-mlevitsk@redhat.com> +Patchwork-id: 94447 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] block: pass BlockDriver reference to the .bdrv_co_create +Bugzilla: 1816007 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +This will allow the reuse of a single generic .bdrv_co_create +implementation for several drivers. +No functional changes. + +Signed-off-by: Maxim Levitsky +Message-Id: <20200326011218.29230-2-mlevitsk@redhat.com> +Reviewed-by: Denis V. Lunev +Signed-off-by: Max Reitz +(cherry picked from commit b92902dfeaafbceaf744ab7473f2d070284f6172) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 3 ++- + block/crypto.c | 3 ++- + block/file-posix.c | 4 +++- + block/file-win32.c | 4 +++- + block/gluster.c | 3 ++- + block/nfs.c | 4 +++- + block/parallels.c | 3 ++- + block/qcow.c | 3 ++- + block/qcow2.c | 4 +++- + block/qed.c | 3 ++- + block/raw-format.c | 4 +++- + block/rbd.c | 3 ++- + block/sheepdog.c | 4 +++- + block/ssh.c | 4 +++- + block/vdi.c | 4 +++- + block/vhdx.c | 3 ++- + block/vmdk.c | 4 +++- + block/vpc.c | 6 ++++-- + include/block/block_int.h | 3 ++- + 19 files changed, 49 insertions(+), 20 deletions(-) + +diff --git a/block.c b/block.c +index ec29b1e..f9a1c5b 100644 +--- a/block.c ++++ b/block.c +@@ -482,7 +482,8 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) + CreateCo *cco = opaque; + assert(cco->drv); + +- ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); ++ ret = cco->drv->bdrv_co_create_opts(cco->drv, ++ cco->filename, cco->opts, &local_err); + error_propagate(&cco->err, local_err); + cco->ret = ret; + } +diff --git a/block/crypto.c b/block/crypto.c +index 2482383..970d463 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -539,7 +539,8 @@ fail: + return ret; + } + +-static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, ++static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/file-posix.c b/block/file-posix.c +index fd29372..a2e0a74 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2346,7 +2346,9 @@ out: + return result; + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions options; +diff --git a/block/file-win32.c b/block/file-win32.c +index 77e8ff7..1585983 100644 +--- a/block/file-win32.c ++++ b/block/file-win32.c +@@ -588,7 +588,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) + return 0; + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions options; +diff --git a/block/gluster.c b/block/gluster.c +index 4fa4a77..0aa1f2c 100644 +--- a/block/gluster.c ++++ b/block/gluster.c +@@ -1130,7 +1130,8 @@ out: + return ret; + } + +-static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, ++static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/nfs.c b/block/nfs.c +index 9a6311e..cc2413d 100644 +--- a/block/nfs.c ++++ b/block/nfs.c +@@ -662,7 +662,9 @@ out: + return ret; + } + +-static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, ++static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv, ++ const char *url, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options; +diff --git a/block/parallels.c b/block/parallels.c +index 7a01997..6d4ed77 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -609,7 +609,8 @@ exit: + goto out; + } + +-static int coroutine_fn parallels_co_create_opts(const char *filename, ++static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/qcow.c b/block/qcow.c +index fce8989..8973e4e 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -934,7 +934,8 @@ exit: + return ret; + } + +-static int coroutine_fn qcow_co_create_opts(const char *filename, ++static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/qcow2.c b/block/qcow2.c +index 83b1fc0..71067c6 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3558,7 +3558,9 @@ out: + return ret; + } + +-static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/qed.c b/block/qed.c +index d8c4e5f..1af9b3c 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -720,7 +720,8 @@ out: + return ret; + } + +-static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, ++static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/raw-format.c b/block/raw-format.c +index 3a76ec7..93b25e1 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -419,7 +419,9 @@ static int raw_has_zero_init_truncate(BlockDriverState *bs) + return bdrv_has_zero_init_truncate(bs->file->bs); + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + return bdrv_create_file(filename, opts, errp); +diff --git a/block/rbd.c b/block/rbd.c +index 027cbcc..8847259 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -425,7 +425,8 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) + return qemu_rbd_do_create(options, NULL, NULL, errp); + } + +-static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, ++static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/sheepdog.c b/block/sheepdog.c +index cfa8433..a8a7e32 100644 +--- a/block/sheepdog.c ++++ b/block/sheepdog.c +@@ -2157,7 +2157,9 @@ out: + return ret; + } + +-static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn sd_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/ssh.c b/block/ssh.c +index b4375cf..84e9282 100644 +--- a/block/ssh.c ++++ b/block/ssh.c +@@ -963,7 +963,9 @@ fail: + return ret; + } + +-static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn ssh_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options; +diff --git a/block/vdi.c b/block/vdi.c +index 0142da7..e1a11f2 100644 +--- a/block/vdi.c ++++ b/block/vdi.c +@@ -896,7 +896,9 @@ static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, + return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); + } + +-static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + QDict *qdict = NULL; +diff --git a/block/vhdx.c b/block/vhdx.c +index f02d261..33e57cd 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -2046,7 +2046,8 @@ delete_and_exit: + return ret; + } + +-static int coroutine_fn vhdx_co_create_opts(const char *filename, ++static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/vmdk.c b/block/vmdk.c +index 20e909d..eb726f2 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2588,7 +2588,9 @@ exit: + return blk; + } + +-static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + Error *local_err = NULL; +diff --git a/block/vpc.c b/block/vpc.c +index a655502..6df75e2 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -1089,8 +1089,10 @@ out: + return ret; + } + +-static int coroutine_fn vpc_co_create_opts(const char *filename, +- QemuOpts *opts, Error **errp) ++static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp) + { + BlockdevCreateOptions *create_options = NULL; + QDict *qdict; +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 96e327b..7ff81be 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -136,7 +136,8 @@ struct BlockDriver { + void (*bdrv_close)(BlockDriverState *bs); + int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, + Error **errp); +- int coroutine_fn (*bdrv_co_create_opts)(const char *filename, ++ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp); + int (*bdrv_make_empty)(BlockDriverState *bs); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch b/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch new file mode 100644 index 0000000..2c27fd2 --- /dev/null +++ b/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch @@ -0,0 +1,78 @@ +From ec5408763c49cd0b63ee324bdc38a429ed1adeee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:29 +0000 +Subject: [PATCH 09/20] block/qcow2: Move bitmap reopen into + bdrv_reopen_commit_post + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-4-kwolf@redhat.com> +Patchwork-id: 94280 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/13] block/qcow2: Move bitmap reopen into bdrv_reopen_commit_post +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +The bitmap code requires writing the 'file' child when the qcow2 driver +is reopened in read-write mode. + +If the 'file' child is being reopened due to a permissions change, the +modification is commited yet when qcow2_reopen_commit is called. This +means that any attempt to write the 'file' child will end with EBADFD +as the original fd was already closed. + +Moving bitmap reopening to the new callback which is called after +permission modifications are commited fixes this as the file descriptor +will be replaced with the correct one. + +The above problem manifests itself when reopening 'qcow2' format layer +which uses a 'file-posix' file child which was opened with the +'auto-read-only' property set. + +Signed-off-by: Peter Krempa +Message-Id: +Signed-off-by: Kevin Wolf +(cherry picked from commit 65eb7c85a3e62529e2bad782e94d5a7b11dd5a92) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 7c18721..83b1fc0 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1881,6 +1881,11 @@ fail: + static void qcow2_reopen_commit(BDRVReopenState *state) + { + qcow2_update_options_commit(state->bs, state->opaque); ++ g_free(state->opaque); ++} ++ ++static void qcow2_reopen_commit_post(BDRVReopenState *state) ++{ + if (state->flags & BDRV_O_RDWR) { + Error *local_err = NULL; + +@@ -1895,7 +1900,6 @@ static void qcow2_reopen_commit(BDRVReopenState *state) + bdrv_get_node_name(state->bs)); + } + } +- g_free(state->opaque); + } + + static void qcow2_reopen_abort(BDRVReopenState *state) +@@ -5492,6 +5496,7 @@ BlockDriver bdrv_qcow2 = { + .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, + .bdrv_reopen_commit = qcow2_reopen_commit, ++ .bdrv_reopen_commit_post = qcow2_reopen_commit_post, + .bdrv_reopen_abort = qcow2_reopen_abort, + .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_format_default_perms, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch b/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch new file mode 100644 index 0000000..5ba1521 --- /dev/null +++ b/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch @@ -0,0 +1,296 @@ +From a1f7b929ae1fe6fa424c520c3a5eb497333b0fd9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 26 Mar 2020 20:23:07 +0000 +Subject: [PATCH 2/4] block: trickle down the fallback image creation function + use to the block drivers + +RH-Author: Maxim Levitsky +Message-id: <20200326202307.9264-3-mlevitsk@redhat.com> +Patchwork-id: 94446 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] block: trickle down the fallback image creation function use to the block drivers +Bugzilla: 1816007 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +Instead of checking the .bdrv_co_create_opts to see if we need the +fallback, just implement the .bdrv_co_create_opts in the drivers that +need it. + +This way we don't break various places that need to know if the +underlying protocol/format really supports image creation, and this way +we still allow some drivers to not support image creation. + +Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816007 + +Note that technically this driver reverts the image creation fallback +for the vxhs driver since I don't have a means to test it, and IMHO it +is better to leave it not supported as it was prior to generic image +creation patches. + +Also drop iscsi_create_opts which was left accidentally. + +Signed-off-by: Maxim Levitsky +Message-Id: <20200326011218.29230-3-mlevitsk@redhat.com> +Reviewed-by: Denis V. Lunev +[mreitz: Fixed alignment, and moved bdrv_co_create_opts_simple() and + bdrv_create_opts_simple from block.h into block_int.h] +Signed-off-by: Max Reitz +(cherry picked from commit 5a5e7f8cd86b7ced0732b1b6e28c82baa65b09c9) + +Contextual conflicts in block.c and include/block/block_int.h + +(conflict in block.c by default shows as functional but +with --diff-algorithm=patience it becomes a contextual conflict) + +... +001/2:[----] [--] 'block: pass BlockDriver reference to the .bdrv_co_create' +002/2:[0014] [FC] 'block: trickle down the fallback image creation function use to the block drivers' +... +002/2: 'meld <(git show 5a5e7f8^\!) <(git show 6d3bca5^\!)' + +So now running: +meld <(git show 5a5e7f8^\! --diff-algorithm=patience) <(git show 6d3bca5^\! --diff-algorithm=patience) + +shows no contextual conflicts +It is mostly due to missing commit f6dc1c31d3801dcbdf0c56574f9ff4f05180810c +Thanks to Max Reitz for helping me with this. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 35 ++++++++++++++++++++--------------- + block/file-posix.c | 7 ++++++- + block/iscsi.c | 16 ++++------------ + block/nbd.c | 6 ++++++ + block/nvme.c | 3 +++ + include/block/block.h | 1 + + include/block/block_int.h | 11 +++++++++++ + 7 files changed, 51 insertions(+), 28 deletions(-) + +diff --git a/block.c b/block.c +index f9a1c5b..ba3b40d7 100644 +--- a/block.c ++++ b/block.c +@@ -597,8 +597,15 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, + return 0; + } + +-static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, +- QemuOpts *opts, Error **errp) ++/** ++ * Simple implementation of bdrv_co_create_opts for protocol drivers ++ * which only support creation via opening a file ++ * (usually existing raw storage device) ++ */ ++int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp) + { + BlockBackend *blk; + QDict *options; +@@ -662,11 +669,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) + return -ENOENT; + } + +- if (drv->bdrv_co_create_opts) { +- return bdrv_create(drv, filename, opts, errp); +- } else { +- return bdrv_create_file_fallback(filename, drv, opts, errp); +- } ++ return bdrv_create(drv, filename, opts, errp); + } + + /** +@@ -1543,9 +1546,9 @@ QemuOptsList bdrv_runtime_opts = { + }, + }; + +-static QemuOptsList fallback_create_opts = { +- .name = "fallback-create-opts", +- .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), ++QemuOptsList bdrv_create_opts_simple = { ++ .name = "simple-create-opts", ++ .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, +@@ -5910,13 +5913,15 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- create_opts = qemu_opts_append(create_opts, drv->create_opts); +- if (proto_drv->create_opts) { +- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); +- } else { +- create_opts = qemu_opts_append(create_opts, &fallback_create_opts); ++ if (!proto_drv->create_opts) { ++ error_setg(errp, "Protocol driver '%s' does not support image creation", ++ proto_drv->format_name); ++ return; + } + ++ create_opts = qemu_opts_append(create_opts, drv->create_opts); ++ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ + /* Create parameter list with default values */ + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); +diff --git a/block/file-posix.c b/block/file-posix.c +index a2e0a74..dd18d40 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3432,6 +3432,8 @@ static BlockDriver bdrv_host_device = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, +@@ -3558,10 +3560,11 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + +- + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, +@@ -3690,6 +3693,8 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + + .bdrv_co_preadv = raw_co_preadv, +diff --git a/block/iscsi.c b/block/iscsi.c +index b45da65..16b0716 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -2399,18 +2399,6 @@ out_unlock: + return r; + } + +-static QemuOptsList iscsi_create_opts = { +- .name = "iscsi-create-opts", +- .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head), +- .desc = { +- { +- .name = BLOCK_OPT_SIZE, +- .type = QEMU_OPT_SIZE, +- .help = "Virtual disk size" +- }, +- { /* end of list */ } +- } +-}; + + static const char *const iscsi_strong_runtime_opts[] = { + "transport", +@@ -2434,6 +2422,8 @@ static BlockDriver bdrv_iscsi = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +@@ -2471,6 +2461,8 @@ static BlockDriver bdrv_iser = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +diff --git a/block/nbd.c b/block/nbd.c +index a73f0d9..927915d 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2030,6 +2030,8 @@ static BlockDriver bdrv_nbd = { + .protocol_name = "nbd", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +@@ -2055,6 +2057,8 @@ static BlockDriver bdrv_nbd_tcp = { + .protocol_name = "nbd+tcp", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +@@ -2080,6 +2084,8 @@ static BlockDriver bdrv_nbd_unix = { + .protocol_name = "nbd+unix", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +diff --git a/block/nvme.c b/block/nvme.c +index d41c4bd..7b7c0cc 100644 +--- a/block/nvme.c ++++ b/block/nvme.c +@@ -1333,6 +1333,9 @@ static BlockDriver bdrv_nvme = { + .protocol_name = "nvme", + .instance_size = sizeof(BDRVNVMeState), + ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, ++ + .bdrv_parse_filename = nvme_parse_filename, + .bdrv_file_open = nvme_file_open, + .bdrv_close = nvme_close, +diff --git a/include/block/block.h b/include/block/block.h +index 1df9848..92685d2 100644 +--- a/include/block/block.h ++++ b/include/block/block.h +@@ -293,6 +293,7 @@ BlockDriver *bdrv_find_format(const char *format_name); + int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp); + int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); ++ + BlockDriverState *bdrv_new(void); + void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 7ff81be..529f153 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1325,4 +1325,15 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, + + int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + ++/** ++ * Simple implementation of bdrv_co_create_opts for protocol drivers ++ * which only support creation via opening a file ++ * (usually existing raw storage device) ++ */ ++int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp); ++extern QemuOptsList bdrv_create_opts_simple; ++ + #endif /* BLOCK_INT_H */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-truncate-Don-t-make-backing-file-data-visible.patch b/SOURCES/kvm-block-truncate-Don-t-make-backing-file-data-visible.patch new file mode 100644 index 0000000..114e1b7 --- /dev/null +++ b/SOURCES/kvm-block-truncate-Don-t-make-backing-file-data-visible.patch @@ -0,0 +1,94 @@ +From d84b9b93755ece6618ed98fa84386beeb1a0e40b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:36 +0100 +Subject: [PATCH 08/17] block: truncate: Don't make backing file data visible + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-8-kwolf@redhat.com> +Patchwork-id: 97454 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 07/11] block: truncate: Don't make backing file data visible +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +When extending the size of an image that has a backing file larger than +its old size, make sure that the backing file data doesn't become +visible in the guest, but the added area is properly zeroed out. + +Consider the following scenario where the overlay is shorter than its +backing file: + + base.qcow2: AAAAAAAA + overlay.qcow2: BBBB + +When resizing (extending) overlay.qcow2, the new blocks should not stay +unallocated and make the additional As from base.qcow2 visible like +before this patch, but zeros should be read. + +A similar case happens with the various variants of a commit job when an +intermediate file is short (- for unallocated): + + base.qcow2: A-A-AAAA + mid.qcow2: BB-B + top.qcow2: C--C--C- + +After commit top.qcow2 to mid.qcow2, the following happens: + + mid.qcow2: CB-C00C0 (correct result) + mid.qcow2: CB-C--C- (before this fix) + +Without the fix, blocks that previously read as zeros on top.qcow2 +suddenly turn into A. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200424125448.63318-8-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 955c7d6687fefcd903900a1e597fcbc896c661cd) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/io.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/block/io.c b/block/io.c +index 3235ce5..6c70b56 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -3370,6 +3370,31 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + goto out; + } + ++ /* ++ * If the image has a backing file that is large enough that it would ++ * provide data for the new area, we cannot leave it unallocated because ++ * then the backing file content would become visible. Instead, zero-fill ++ * the new area. ++ * ++ * Note that if the image has a backing file, but was opened without the ++ * backing file, taking care of keeping things consistent with that backing ++ * file is the user's responsibility. ++ */ ++ if (new_bytes && bs->backing) { ++ int64_t backing_len; ++ ++ backing_len = bdrv_getlength(backing_bs(bs)); ++ if (backing_len < 0) { ++ ret = backing_len; ++ error_setg_errno(errp, -ret, "Could not get backing file size"); ++ goto out; ++ } ++ ++ if (backing_len > old_size) { ++ flags |= BDRV_REQ_ZERO_WRITE; ++ } ++ } ++ + if (drv->bdrv_co_truncate) { + if (flags & ~bs->supported_truncate_flags) { + error_setg(errp, "Block driver does not support requested flags"); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block.c-adding-bdrv_co_delete_file.patch b/SOURCES/kvm-block.c-adding-bdrv_co_delete_file.patch new file mode 100644 index 0000000..91c3cd1 --- /dev/null +++ b/SOURCES/kvm-block.c-adding-bdrv_co_delete_file.patch @@ -0,0 +1,92 @@ +From 23b92512d7f11b3a38cf24a5c2fe7848f1e550e8 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Sun, 31 May 2020 16:40:34 +0100 +Subject: [PATCH 6/7] block.c: adding bdrv_co_delete_file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Maxim Levitsky +Message-id: <20200531164035.34188-3-mlevitsk@redhat.com> +Patchwork-id: 97058 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/3] block.c: adding bdrv_co_delete_file +Bugzilla: 1827630 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: John Snow +RH-Acked-by: Eric Blake + +From: Daniel Henrique Barboza + +Using the new 'bdrv_co_delete_file' interface, a pure co_routine function +'bdrv_co_delete_file' inside block.c can can be used in a way similar of +the existing bdrv_create_file to to clean up a created file. + +We're creating a pure co_routine because the only caller of +'bdrv_co_delete_file' will be already in co_routine context, thus there +is no need to add all the machinery to check for qemu_in_coroutine() and +create a separated co_routine to do the job. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Daniel Henrique Barboza +Message-Id: <20200130213907.2830642-3-danielhb413@gmail.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit e1d7f8bb1ec0c6911dcea81641ce6139dbded02d) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 26 ++++++++++++++++++++++++++ + include/block/block.h | 1 + + 2 files changed, 27 insertions(+) + +diff --git a/block.c b/block.c +index ba3b40d7..d6a05da 100644 +--- a/block.c ++++ b/block.c +@@ -672,6 +672,32 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) + return bdrv_create(drv, filename, opts, errp); + } + ++int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) ++{ ++ Error *local_err = NULL; ++ int ret; ++ ++ assert(bs != NULL); ++ ++ if (!bs->drv) { ++ error_setg(errp, "Block node '%s' is not opened", bs->filename); ++ return -ENOMEDIUM; ++ } ++ ++ if (!bs->drv->bdrv_co_delete_file) { ++ error_setg(errp, "Driver '%s' does not support image deletion", ++ bs->drv->format_name); ++ return -ENOTSUP; ++ } ++ ++ ret = bs->drv->bdrv_co_delete_file(bs, &local_err); ++ if (ret < 0) { ++ error_propagate(errp, local_err); ++ } ++ ++ return ret; ++} ++ + /** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz struct and return 0. +diff --git a/include/block/block.h b/include/block/block.h +index 92685d2..b2a3074 100644 +--- a/include/block/block.h ++++ b/include/block/block.h +@@ -373,6 +373,7 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, + int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, + Error **errp); + void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); ++int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp); + + + typedef struct BdrvCheckResult { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch b/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch new file mode 100644 index 0000000..9a69130 --- /dev/null +++ b/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch @@ -0,0 +1,176 @@ +From dc2654f2319ad6c379e0ba10be143726c6f0e9e0 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:47 +0000 +Subject: [PATCH 14/18] blockdev: Acquire AioContext on dirty bitmap functions + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-8-slp@redhat.com> +Patchwork-id: 93760 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 7/9] blockdev: Acquire AioContext on dirty bitmap functions +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Dirty map addition and removal functions are not acquiring to BDS +AioContext, while they may call to code that expects it to be +acquired. + +This may trigger a crash with a stack trace like this one: + + #0 0x00007f0ef146370f in __GI_raise (sig=sig@entry=6) + at ../sysdeps/unix/sysv/linux/raise.c:50 + #1 0x00007f0ef144db25 in __GI_abort () at abort.c:79 + #2 0x0000565022294dce in error_exit + (err=, msg=msg@entry=0x56502243a730 <__func__.16350> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 + #3 0x00005650222950ba in qemu_mutex_unlock_impl + (mutex=mutex@entry=0x5650244b0240, file=file@entry=0x565022439adf "util/async.c", line=line@entry=526) at util/qemu-thread-posix.c:108 + #4 0x0000565022290029 in aio_context_release + (ctx=ctx@entry=0x5650244b01e0) at util/async.c:526 + #5 0x000056502221cd08 in bdrv_can_store_new_dirty_bitmap + (bs=bs@entry=0x5650244dc820, name=name@entry=0x56502481d360 "bitmap1", granularity=granularity@entry=65536, errp=errp@entry=0x7fff22831718) + at block/dirty-bitmap.c:542 + #6 0x000056502206ae53 in qmp_block_dirty_bitmap_add + (errp=0x7fff22831718, disabled=false, has_disabled=, persistent=, has_persistent=true, granularity=65536, has_granularity=, name=0x56502481d360 "bitmap1", node=) at blockdev.c:2894 + #7 0x000056502206ae53 in qmp_block_dirty_bitmap_add + (node=, name=0x56502481d360 "bitmap1", has_granularity=, granularity=, has_persistent=true, persistent=, has_disabled=false, disabled=false, errp=0x7fff22831718) at blockdev.c:2856 + #8 0x00005650221847a3 in qmp_marshal_block_dirty_bitmap_add + (args=, ret=, errp=0x7fff22831798) + at qapi/qapi-commands-block-core.c:651 + #9 0x0000565022247e6c in do_qmp_dispatch + (errp=0x7fff22831790, allow_oob=, request=, cmds=0x565022b32d60 ) at qapi/qmp-dispatch.c:132 + #10 0x0000565022247e6c in qmp_dispatch + (cmds=0x565022b32d60 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 + #11 0x0000565022166061 in monitor_qmp_dispatch + (mon=0x56502450faa0, req=) at monitor/qmp.c:145 + #12 0x00005650221666fa in monitor_qmp_bh_dispatcher + (data=) at monitor/qmp.c:234 + #13 0x000056502228f866 in aio_bh_call (bh=0x56502440eae0) + at util/async.c:117 + #14 0x000056502228f866 in aio_bh_poll (ctx=ctx@entry=0x56502440d7a0) + at util/async.c:117 + #15 0x0000565022292c54 in aio_dispatch (ctx=0x56502440d7a0) + at util/aio-posix.c:459 + #16 0x000056502228f742 in aio_ctx_dispatch + (source=, callback=, user_data=) at util/async.c:260 + #17 0x00007f0ef5ce667d in g_main_dispatch (context=0x56502449aa40) + at gmain.c:3176 + #18 0x00007f0ef5ce667d in g_main_context_dispatch + (context=context@entry=0x56502449aa40) at gmain.c:3829 + #19 0x0000565022291d08 in glib_pollfds_poll () at util/main-loop.c:219 + #20 0x0000565022291d08 in os_host_main_loop_wait + (timeout=) at util/main-loop.c:242 + #21 0x0000565022291d08 in main_loop_wait (nonblocking=) + at util/main-loop.c:518 + #22 0x00005650220743c1 in main_loop () at vl.c:1828 + #23 0x0000565021f20a72 in main + (argc=, argv=, envp=) + at vl.c:4504 + +Fix this by acquiring the AioContext at qmp_block_dirty_bitmap_add() +and qmp_block_dirty_bitmap_add(). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782175 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 91005a495e228ebd7e5e173cd18f952450eef82d) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 1dacbc2..d4ef6cd 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2984,6 +2984,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; + + if (!name || name[0] == '\0') { + error_setg(errp, "Bitmap name cannot be empty"); +@@ -2995,11 +2996,14 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + return; + } + ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ + if (has_granularity) { + if (granularity < 512 || !is_power_of_2(granularity)) { + error_setg(errp, "Granularity must be power of 2 " + "and at least 512"); +- return; ++ goto out; + } + } else { + /* Default to cluster size, if available: */ +@@ -3017,12 +3021,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + if (persistent && + !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) + { +- return; ++ goto out; + } + + bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); + if (bitmap == NULL) { +- return; ++ goto out; + } + + if (disabled) { +@@ -3030,6 +3034,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + } + + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); ++ ++out: ++ aio_context_release(aio_context); + } + + static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( +@@ -3038,21 +3045,27 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { + return NULL; + } + ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, + errp)) { ++ aio_context_release(aio_context); + return NULL; + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap) && + bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) + { +- return NULL; ++ aio_context_release(aio_context); ++ return NULL; + } + + if (release) { +@@ -3063,6 +3076,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( + *bitmap_bs = bs; + } + ++ aio_context_release(aio_context); + return release ? NULL : bitmap; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch b/SOURCES/kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch new file mode 100644 index 0000000..8cb1700 --- /dev/null +++ b/SOURCES/kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch @@ -0,0 +1,179 @@ +From 0c8ba0a96a7d0cbf371f1a5fbee543e8b2cb2595 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:13 +0100 +Subject: [PATCH 08/26] blockdev: Promote several bitmap functions to + non-static +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-6-eblake@redhat.com> +Patchwork-id: 97077 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 05/12] blockdev: Promote several bitmap functions to non-static +Bugzilla: 1779893 1779904 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +The next patch will split blockdev.c, which will require accessing +some previously-static functions from more than one .c file. But part +of promoting a function to public is picking a naming scheme that does +not reek of exposing too many internals (two of the three functions +were named starting with 'do_'). To make future code motion easier, +perform the function rename and non-static promotion into its own +patch. + +Signed-off-by: Eric Blake +Reviewed-by: Max Reitz +Message-Id: <20200513011648.166876-5-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit c6996cf9a6c759c29919642be9a73ac64b38301b) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 47 +++++++++++++++++++---------------------------- + include/block/block_int.h | 12 ++++++++++++ + 2 files changed, 31 insertions(+), 28 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 86eb115..3958058 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1262,10 +1262,10 @@ out_aio_context: + * + * @return: A bitmap object on success, or NULL on failure. + */ +-static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, +- const char *name, +- BlockDriverState **pbs, +- Error **errp) ++BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, ++ const char *name, ++ BlockDriverState **pbs, ++ Error **errp) + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +@@ -2241,11 +2241,6 @@ static void block_dirty_bitmap_disable_abort(BlkActionState *common) + } + } + +-static BdrvDirtyBitmap *do_block_dirty_bitmap_merge( +- const char *node, const char *target, +- BlockDirtyBitmapMergeSourceList *bitmaps, +- HBitmap **backup, Error **errp); +- + static void block_dirty_bitmap_merge_prepare(BlkActionState *common, + Error **errp) + { +@@ -2259,15 +2254,11 @@ static void block_dirty_bitmap_merge_prepare(BlkActionState *common, + + action = common->action->u.block_dirty_bitmap_merge.data; + +- state->bitmap = do_block_dirty_bitmap_merge(action->node, action->target, +- action->bitmaps, &state->backup, +- errp); ++ state->bitmap = block_dirty_bitmap_merge(action->node, action->target, ++ action->bitmaps, &state->backup, ++ errp); + } + +-static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( +- const char *node, const char *name, bool release, +- BlockDriverState **bitmap_bs, Error **errp); +- + static void block_dirty_bitmap_remove_prepare(BlkActionState *common, + Error **errp) + { +@@ -2281,8 +2272,8 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common, + + action = common->action->u.block_dirty_bitmap_remove.data; + +- state->bitmap = do_block_dirty_bitmap_remove(action->node, action->name, +- false, &state->bs, errp); ++ state->bitmap = block_dirty_bitmap_remove(action->node, action->name, ++ false, &state->bs, errp); + if (state->bitmap) { + bdrv_dirty_bitmap_skip_store(state->bitmap, true); + bdrv_dirty_bitmap_set_busy(state->bitmap, true); +@@ -3046,9 +3037,10 @@ out: + aio_context_release(aio_context); + } + +-static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( +- const char *node, const char *name, bool release, +- BlockDriverState **bitmap_bs, Error **errp) ++BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, ++ bool release, ++ BlockDriverState **bitmap_bs, ++ Error **errp) + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +@@ -3090,7 +3082,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( + void qmp_block_dirty_bitmap_remove(const char *node, const char *name, + Error **errp) + { +- do_block_dirty_bitmap_remove(node, name, true, NULL, errp); ++ block_dirty_bitmap_remove(node, name, true, NULL, errp); + } + + /** +@@ -3151,10 +3143,9 @@ void qmp_block_dirty_bitmap_disable(const char *node, const char *name, + bdrv_disable_dirty_bitmap(bitmap); + } + +-static BdrvDirtyBitmap *do_block_dirty_bitmap_merge( +- const char *node, const char *target, +- BlockDirtyBitmapMergeSourceList *bitmaps, +- HBitmap **backup, Error **errp) ++BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, ++ BlockDirtyBitmapMergeSourceList *bms, ++ HBitmap **backup, Error **errp) + { + BlockDriverState *bs; + BdrvDirtyBitmap *dst, *src, *anon; +@@ -3172,7 +3163,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_merge( + return NULL; + } + +- for (lst = bitmaps; lst; lst = lst->next) { ++ for (lst = bms; lst; lst = lst->next) { + switch (lst->value->type) { + const char *name, *node; + case QTYPE_QSTRING: +@@ -3217,7 +3208,7 @@ void qmp_block_dirty_bitmap_merge(const char *node, const char *target, + BlockDirtyBitmapMergeSourceList *bitmaps, + Error **errp) + { +- do_block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); ++ block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); + } + + BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node, +diff --git a/include/block/block_int.h b/include/block/block_int.h +index cc18e8d..876a83d 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1341,4 +1341,16 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + Error **errp); + extern QemuOptsList bdrv_create_opts_simple; + ++BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, ++ const char *name, ++ BlockDriverState **pbs, ++ Error **errp); ++BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, ++ BlockDirtyBitmapMergeSourceList *bms, ++ HBitmap **backup, Error **errp); ++BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, ++ bool release, ++ BlockDriverState **bitmap_bs, ++ Error **errp); ++ + #endif /* BLOCK_INT_H */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch new file mode 100644 index 0000000..b2dd453 --- /dev/null +++ b/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch @@ -0,0 +1,107 @@ +From 24e5eca4218b294bd013e2d85a38345045506bec Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:48 +0000 +Subject: [PATCH 15/18] blockdev: Return bs to the proper context on snapshot + abort + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-9-slp@redhat.com> +Patchwork-id: 93761 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 8/9] blockdev: Return bs to the proper context on snapshot abort +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +external_snapshot_abort() calls to bdrv_set_backing_hd(), which +returns state->old_bs to the main AioContext, as it's intended to be +used then the BDS is going to be released. As that's not the case when +aborting an external snapshot, return it to the AioContext it was +before the call. + +This issue can be triggered by issuing a transaction with two actions, +a proper blockdev-snapshot-sync and a bogus one, so the second will +trigger a transaction abort. This results in a crash with an stack +trace like this one: + + #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 + #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 + #2 0x00007fa10489cbc9 in __assert_fail_base + (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 + #3 0x00007fa1048aae96 in __GI___assert_fail + (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 + #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 + #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 + #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 + #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 + #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 + #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 + #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 + #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 + #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 + #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 + #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 + #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 + #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 + #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 + #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 + #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 + #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 + #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 + #23 0x0000557223d13201 in main_loop () at vl.c:1828 + #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 377410f6fb4f6b0d26d4a028c20766fae05de17e) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/blockdev.c b/blockdev.c +index d4ef6cd..4cd9a58 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1731,6 +1731,8 @@ static void external_snapshot_abort(BlkActionState *common) + if (state->new_bs) { + if (state->overlay_appended) { + AioContext *aio_context; ++ AioContext *tmp_context; ++ int ret; + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); +@@ -1738,6 +1740,25 @@ static void external_snapshot_abort(BlkActionState *common) + bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() + close state->old_bs; we need it */ + bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); ++ ++ /* ++ * The call to bdrv_set_backing_hd() above returns state->old_bs to ++ * the main AioContext. As we're still going to be using it, return ++ * it to the AioContext it was before. ++ */ ++ tmp_context = bdrv_get_aio_context(state->old_bs); ++ if (aio_context != tmp_context) { ++ aio_context_release(aio_context); ++ aio_context_acquire(tmp_context); ++ ++ ret = bdrv_try_set_aio_context(state->old_bs, ++ aio_context, NULL); ++ assert(ret == 0); ++ ++ aio_context_release(tmp_context); ++ aio_context_acquire(aio_context); ++ } ++ + bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch b/SOURCES/kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch new file mode 100644 index 0000000..d977922 --- /dev/null +++ b/SOURCES/kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch @@ -0,0 +1,720 @@ +From 2afa718d59ef86879a9e34b4601a1f2658afa9ba Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:14 +0100 +Subject: [PATCH 09/26] blockdev: Split off basic bitmap operations for + qemu-img + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-7-eblake@redhat.com> +Patchwork-id: 97073 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 06/12] blockdev: Split off basic bitmap operations for qemu-img +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Upcoming patches want to add some basic bitmap manipulation abilities +to qemu-img. But blockdev.o is too heavyweight to link into qemu-img +(among other things, it would drag in block jobs and transaction +support - qemu-img does offline manipulation, where atomicity is less +important because there are no concurrent modifications to compete +with), so it's time to split off the bare bones of what we will need +into a new file block/monitor/bitmap-qmp-cmds.o. + +This is sufficient to expose 6 QMP commands for use by qemu-img (add, +remove, clear, enable, disable, merge), as well as move the three +helper functions touched in the previous patch. Regarding +MAINTAINERS, the new file is automatically part of block core, but +also makes sense as related to other dirty bitmap files. + +Signed-off-by: Eric Blake +Reviewed-by: Max Reitz +Message-Id: <20200513011648.166876-6-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit bb4e58c6137e80129b955789dd4b66c1504f20dc) + +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + Makefile.objs - comment context + block/monitor/Makefile.objs - context: a2dde2f2 not backported + blockdev.c - context +Signed-off-by: Eric Blake + +Signed-off-by: Danilo C. L. de Paula +--- + MAINTAINERS | 1 + + Makefile.objs | 3 +- + block/monitor/Makefile.objs | 1 + + block/monitor/bitmap-qmp-cmds.c | 321 ++++++++++++++++++++++++++++++++++++++++ + blockdev.c | 284 ----------------------------------- + 5 files changed, 324 insertions(+), 286 deletions(-) + create mode 100644 block/monitor/Makefile.objs + create mode 100644 block/monitor/bitmap-qmp-cmds.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 3a81ac9..49d5d44 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1875,6 +1875,7 @@ L: qemu-block@nongnu.org + S: Supported + F: include/qemu/hbitmap.h + F: include/block/dirty-bitmap.h ++F: block/monitor/bitmap-qmp-cmds.c + F: block/dirty-bitmap.c + F: block/qcow2-bitmap.c + F: migration/block-dirty-bitmap.c +diff --git a/Makefile.objs b/Makefile.objs +index 1a8f288..7404ef0 100644 +--- a/Makefile.objs ++++ b/Makefile.objs +@@ -13,9 +13,8 @@ authz-obj-y = authz/ + ####################################################################### + # block-obj-y is code used by both qemu system emulation and qemu-img + +-block-obj-y = nbd/ ++block-obj-y = block/ block/monitor/ nbd/ scsi/ + block-obj-y += block.o blockjob.o job.o +-block-obj-y += block/ scsi/ + block-obj-y += qemu-io-cmds.o + block-obj-$(CONFIG_REPLICATION) += replication.o + +diff --git a/block/monitor/Makefile.objs b/block/monitor/Makefile.objs +new file mode 100644 +index 0000000..f0c7642 +--- /dev/null ++++ b/block/monitor/Makefile.objs +@@ -0,0 +1 @@ ++block-obj-y += bitmap-qmp-cmds.o +diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c +new file mode 100644 +index 0000000..9f11dee +--- /dev/null ++++ b/block/monitor/bitmap-qmp-cmds.c +@@ -0,0 +1,321 @@ ++/* ++ * QEMU block dirty bitmap QMP commands ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or ++ * later. See the COPYING file in the top-level directory. ++ * ++ * This file incorporates work covered by the following copyright and ++ * permission notice: ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "block/block_int.h" ++#include "qapi/qapi-commands-block.h" ++#include "qapi/error.h" ++ ++/** ++ * block_dirty_bitmap_lookup: ++ * Return a dirty bitmap (if present), after validating ++ * the node reference and bitmap names. ++ * ++ * @node: The name of the BDS node to search for bitmaps ++ * @name: The name of the bitmap to search for ++ * @pbs: Output pointer for BDS lookup, if desired. Can be NULL. ++ * @errp: Output pointer for error information. Can be NULL. ++ * ++ * @return: A bitmap object on success, or NULL on failure. ++ */ ++BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, ++ const char *name, ++ BlockDriverState **pbs, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvDirtyBitmap *bitmap; ++ ++ if (!node) { ++ error_setg(errp, "Node cannot be NULL"); ++ return NULL; ++ } ++ if (!name) { ++ error_setg(errp, "Bitmap name cannot be NULL"); ++ return NULL; ++ } ++ bs = bdrv_lookup_bs(node, node, NULL); ++ if (!bs) { ++ error_setg(errp, "Node '%s' not found", node); ++ return NULL; ++ } ++ ++ bitmap = bdrv_find_dirty_bitmap(bs, name); ++ if (!bitmap) { ++ error_setg(errp, "Dirty bitmap '%s' not found", name); ++ return NULL; ++ } ++ ++ if (pbs) { ++ *pbs = bs; ++ } ++ ++ return bitmap; ++} ++ ++void qmp_block_dirty_bitmap_add(const char *node, const char *name, ++ bool has_granularity, uint32_t granularity, ++ bool has_persistent, bool persistent, ++ bool has_disabled, bool disabled, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; ++ ++ if (!name || name[0] == '\0') { ++ error_setg(errp, "Bitmap name cannot be empty"); ++ return; ++ } ++ ++ bs = bdrv_lookup_bs(node, node, errp); ++ if (!bs) { ++ return; ++ } ++ ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ ++ if (has_granularity) { ++ if (granularity < 512 || !is_power_of_2(granularity)) { ++ error_setg(errp, "Granularity must be power of 2 " ++ "and at least 512"); ++ goto out; ++ } ++ } else { ++ /* Default to cluster size, if available: */ ++ granularity = bdrv_get_default_bitmap_granularity(bs); ++ } ++ ++ if (!has_persistent) { ++ persistent = false; ++ } ++ ++ if (!has_disabled) { ++ disabled = false; ++ } ++ ++ if (persistent && ++ !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) ++ { ++ goto out; ++ } ++ ++ bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); ++ if (bitmap == NULL) { ++ goto out; ++ } ++ ++ if (disabled) { ++ bdrv_disable_dirty_bitmap(bitmap); ++ } ++ ++ bdrv_dirty_bitmap_set_persistence(bitmap, persistent); ++ ++out: ++ aio_context_release(aio_context); ++} ++ ++BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, ++ bool release, ++ BlockDriverState **bitmap_bs, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; ++ ++ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); ++ if (!bitmap || !bs) { ++ return NULL; ++ } ++ ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ ++ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, ++ errp)) { ++ aio_context_release(aio_context); ++ return NULL; ++ } ++ ++ if (bdrv_dirty_bitmap_get_persistence(bitmap) && ++ bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) ++ { ++ aio_context_release(aio_context); ++ return NULL; ++ } ++ ++ if (release) { ++ bdrv_release_dirty_bitmap(bitmap); ++ } ++ ++ if (bitmap_bs) { ++ *bitmap_bs = bs; ++ } ++ ++ aio_context_release(aio_context); ++ return release ? NULL : bitmap; ++} ++ ++void qmp_block_dirty_bitmap_remove(const char *node, const char *name, ++ Error **errp) ++{ ++ block_dirty_bitmap_remove(node, name, true, NULL, errp); ++} ++ ++/** ++ * Completely clear a bitmap, for the purposes of synchronizing a bitmap ++ * immediately after a full backup operation. ++ */ ++void qmp_block_dirty_bitmap_clear(const char *node, const char *name, ++ Error **errp) ++{ ++ BdrvDirtyBitmap *bitmap; ++ BlockDriverState *bs; ++ ++ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); ++ if (!bitmap || !bs) { ++ return; ++ } ++ ++ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) { ++ return; ++ } ++ ++ bdrv_clear_dirty_bitmap(bitmap, NULL); ++} ++ ++void qmp_block_dirty_bitmap_enable(const char *node, const char *name, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvDirtyBitmap *bitmap; ++ ++ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); ++ if (!bitmap) { ++ return; ++ } ++ ++ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { ++ return; ++ } ++ ++ bdrv_enable_dirty_bitmap(bitmap); ++} ++ ++void qmp_block_dirty_bitmap_disable(const char *node, const char *name, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvDirtyBitmap *bitmap; ++ ++ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); ++ if (!bitmap) { ++ return; ++ } ++ ++ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { ++ return; ++ } ++ ++ bdrv_disable_dirty_bitmap(bitmap); ++} ++ ++BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, ++ BlockDirtyBitmapMergeSourceList *bms, ++ HBitmap **backup, Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvDirtyBitmap *dst, *src, *anon; ++ BlockDirtyBitmapMergeSourceList *lst; ++ Error *local_err = NULL; ++ ++ dst = block_dirty_bitmap_lookup(node, target, &bs, errp); ++ if (!dst) { ++ return NULL; ++ } ++ ++ anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst), ++ NULL, errp); ++ if (!anon) { ++ return NULL; ++ } ++ ++ for (lst = bms; lst; lst = lst->next) { ++ switch (lst->value->type) { ++ const char *name, *node; ++ case QTYPE_QSTRING: ++ name = lst->value->u.local; ++ src = bdrv_find_dirty_bitmap(bs, name); ++ if (!src) { ++ error_setg(errp, "Dirty bitmap '%s' not found", name); ++ dst = NULL; ++ goto out; ++ } ++ break; ++ case QTYPE_QDICT: ++ node = lst->value->u.external.node; ++ name = lst->value->u.external.name; ++ src = block_dirty_bitmap_lookup(node, name, NULL, errp); ++ if (!src) { ++ dst = NULL; ++ goto out; ++ } ++ break; ++ default: ++ abort(); ++ } ++ ++ bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ dst = NULL; ++ goto out; ++ } ++ } ++ ++ /* Merge into dst; dst is unchanged on failure. */ ++ bdrv_merge_dirty_bitmap(dst, anon, backup, errp); ++ ++ out: ++ bdrv_release_dirty_bitmap(anon); ++ return dst; ++} ++ ++void qmp_block_dirty_bitmap_merge(const char *node, const char *target, ++ BlockDirtyBitmapMergeSourceList *bitmaps, ++ Error **errp) ++{ ++ block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); ++} +diff --git a/blockdev.c b/blockdev.c +index 3958058..5128c9b 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1250,53 +1250,6 @@ out_aio_context: + return NULL; + } + +-/** +- * block_dirty_bitmap_lookup: +- * Return a dirty bitmap (if present), after validating +- * the node reference and bitmap names. +- * +- * @node: The name of the BDS node to search for bitmaps +- * @name: The name of the bitmap to search for +- * @pbs: Output pointer for BDS lookup, if desired. Can be NULL. +- * @errp: Output pointer for error information. Can be NULL. +- * +- * @return: A bitmap object on success, or NULL on failure. +- */ +-BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, +- const char *name, +- BlockDriverState **pbs, +- Error **errp) +-{ +- BlockDriverState *bs; +- BdrvDirtyBitmap *bitmap; +- +- if (!node) { +- error_setg(errp, "Node cannot be NULL"); +- return NULL; +- } +- if (!name) { +- error_setg(errp, "Bitmap name cannot be NULL"); +- return NULL; +- } +- bs = bdrv_lookup_bs(node, node, NULL); +- if (!bs) { +- error_setg(errp, "Node '%s' not found", node); +- return NULL; +- } +- +- bitmap = bdrv_find_dirty_bitmap(bs, name); +- if (!bitmap) { +- error_setg(errp, "Dirty bitmap '%s' not found", name); +- return NULL; +- } +- +- if (pbs) { +- *pbs = bs; +- } +- +- return bitmap; +-} +- + /* New and old BlockDriverState structs for atomic group operations */ + + typedef struct BlkActionState BlkActionState; +@@ -2974,243 +2927,6 @@ out: + aio_context_release(aio_context); + } + +-void qmp_block_dirty_bitmap_add(const char *node, const char *name, +- bool has_granularity, uint32_t granularity, +- bool has_persistent, bool persistent, +- bool has_disabled, bool disabled, +- Error **errp) +-{ +- BlockDriverState *bs; +- BdrvDirtyBitmap *bitmap; +- AioContext *aio_context; +- +- if (!name || name[0] == '\0') { +- error_setg(errp, "Bitmap name cannot be empty"); +- return; +- } +- +- bs = bdrv_lookup_bs(node, node, errp); +- if (!bs) { +- return; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- if (has_granularity) { +- if (granularity < 512 || !is_power_of_2(granularity)) { +- error_setg(errp, "Granularity must be power of 2 " +- "and at least 512"); +- goto out; +- } +- } else { +- /* Default to cluster size, if available: */ +- granularity = bdrv_get_default_bitmap_granularity(bs); +- } +- +- if (!has_persistent) { +- persistent = false; +- } +- +- if (!has_disabled) { +- disabled = false; +- } +- +- if (persistent && +- !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) +- { +- goto out; +- } +- +- bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); +- if (bitmap == NULL) { +- goto out; +- } +- +- if (disabled) { +- bdrv_disable_dirty_bitmap(bitmap); +- } +- +- bdrv_dirty_bitmap_set_persistence(bitmap, persistent); +- +-out: +- aio_context_release(aio_context); +-} +- +-BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, +- bool release, +- BlockDriverState **bitmap_bs, +- Error **errp) +-{ +- BlockDriverState *bs; +- BdrvDirtyBitmap *bitmap; +- AioContext *aio_context; +- +- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); +- if (!bitmap || !bs) { +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, +- errp)) { +- aio_context_release(aio_context); +- return NULL; +- } +- +- if (bdrv_dirty_bitmap_get_persistence(bitmap) && +- bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) +- { +- aio_context_release(aio_context); +- return NULL; +- } +- +- if (release) { +- bdrv_release_dirty_bitmap(bitmap); +- } +- +- if (bitmap_bs) { +- *bitmap_bs = bs; +- } +- +- aio_context_release(aio_context); +- return release ? NULL : bitmap; +-} +- +-void qmp_block_dirty_bitmap_remove(const char *node, const char *name, +- Error **errp) +-{ +- block_dirty_bitmap_remove(node, name, true, NULL, errp); +-} +- +-/** +- * Completely clear a bitmap, for the purposes of synchronizing a bitmap +- * immediately after a full backup operation. +- */ +-void qmp_block_dirty_bitmap_clear(const char *node, const char *name, +- Error **errp) +-{ +- BdrvDirtyBitmap *bitmap; +- BlockDriverState *bs; +- +- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); +- if (!bitmap || !bs) { +- return; +- } +- +- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) { +- return; +- } +- +- bdrv_clear_dirty_bitmap(bitmap, NULL); +-} +- +-void qmp_block_dirty_bitmap_enable(const char *node, const char *name, +- Error **errp) +-{ +- BlockDriverState *bs; +- BdrvDirtyBitmap *bitmap; +- +- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); +- if (!bitmap) { +- return; +- } +- +- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { +- return; +- } +- +- bdrv_enable_dirty_bitmap(bitmap); +-} +- +-void qmp_block_dirty_bitmap_disable(const char *node, const char *name, +- Error **errp) +-{ +- BlockDriverState *bs; +- BdrvDirtyBitmap *bitmap; +- +- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); +- if (!bitmap) { +- return; +- } +- +- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { +- return; +- } +- +- bdrv_disable_dirty_bitmap(bitmap); +-} +- +-BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, +- BlockDirtyBitmapMergeSourceList *bms, +- HBitmap **backup, Error **errp) +-{ +- BlockDriverState *bs; +- BdrvDirtyBitmap *dst, *src, *anon; +- BlockDirtyBitmapMergeSourceList *lst; +- Error *local_err = NULL; +- +- dst = block_dirty_bitmap_lookup(node, target, &bs, errp); +- if (!dst) { +- return NULL; +- } +- +- anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst), +- NULL, errp); +- if (!anon) { +- return NULL; +- } +- +- for (lst = bms; lst; lst = lst->next) { +- switch (lst->value->type) { +- const char *name, *node; +- case QTYPE_QSTRING: +- name = lst->value->u.local; +- src = bdrv_find_dirty_bitmap(bs, name); +- if (!src) { +- error_setg(errp, "Dirty bitmap '%s' not found", name); +- dst = NULL; +- goto out; +- } +- break; +- case QTYPE_QDICT: +- node = lst->value->u.external.node; +- name = lst->value->u.external.name; +- src = block_dirty_bitmap_lookup(node, name, NULL, errp); +- if (!src) { +- dst = NULL; +- goto out; +- } +- break; +- default: +- abort(); +- } +- +- bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- dst = NULL; +- goto out; +- } +- } +- +- /* Merge into dst; dst is unchanged on failure. */ +- bdrv_merge_dirty_bitmap(dst, anon, backup, errp); +- +- out: +- bdrv_release_dirty_bitmap(anon); +- return dst; +-} +- +-void qmp_block_dirty_bitmap_merge(const char *node, const char *target, +- BlockDirtyBitmapMergeSourceList *bitmaps, +- Error **errp) +-{ +- block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); +-} +- + BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node, + const char *name, + Error **errp) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch new file mode 100644 index 0000000..399a06a --- /dev/null +++ b/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch @@ -0,0 +1,62 @@ +From d56b53cd75c4146eae7a06d1cc30ab823a9bde93 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:41 +0000 +Subject: [PATCH 08/18] blockdev: fix coding style issues in + drive_backup_prepare +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-2-slp@redhat.com> +Patchwork-id: 93754 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/9] blockdev: fix coding style issues in drive_backup_prepare +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Fix a couple of minor coding style issues in drive_backup_prepare. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 471ded690e19689018535e3f48480507ed073e22) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 8e029e9..553e315 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3620,7 +3620,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + if (!backup->has_format) { + backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char*) bs->drv->format_name; ++ NULL : (char *) bs->drv->format_name; + } + + /* Early check to avoid creating target */ +@@ -3630,8 +3630,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + flags = bs->open_flags | BDRV_O_RDWR; + +- /* See if we have a backing HD we can use to create our new image +- * on top of. */ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ + if (backup->sync == MIRROR_SYNC_MODE_TOP) { + source = backing_bs(bs); + if (!source) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch new file mode 100644 index 0000000..a94ee75 --- /dev/null +++ b/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch @@ -0,0 +1,204 @@ +From da4ee4c0d56200042cb86f8ccd2777009bd82df3 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:44 +0000 +Subject: [PATCH 11/18] blockdev: honor bdrv_try_set_aio_context() context + requirements + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-5-slp@redhat.com> +Patchwork-id: 93758 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/9] blockdev: honor bdrv_try_set_aio_context() context requirements +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +bdrv_try_set_aio_context() requires that the old context is held, and +the new context is not held. Fix all the occurrences where it's not +done this way. + +Suggested-by: Max Reitz +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ea67e08832775a28d0bd2795f01bc77e7ea1512) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 60 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 152a0f7..1dacbc2 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; ++ AioContext *old_context; + int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar +@@ -1675,7 +1676,16 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(state->new_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +@@ -1775,11 +1785,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *target_bs; + BlockDriverState *source = NULL; + AioContext *aio_context; ++ AioContext *old_context; + QDict *options; + Error *local_err = NULL; + int flags; + int64_t size; + bool set_backing_hd = false; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; +@@ -1868,6 +1880,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ bdrv_unref(target_bs); ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { +@@ -1947,6 +1974,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -1961,7 +1990,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + aio_context = bdrv_get_aio_context(bs); ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); + aio_context_acquire(aio_context); + state->bs = bs; + +@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + BlockJob *job = NULL; + BdrvDirtyBitmap *bmap = NULL; + int job_flags = JOB_DEFAULT; +- int ret; + + if (!backup->has_speed) { + backup->speed = 0; +@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + backup->compress = false; + } + +- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +- if (ret < 0) { +- return NULL; +- } +- + if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || + (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { + /* done before desugaring 'incremental' to print the right message */ +@@ -3825,6 +3859,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + BlockDriverState *bs; + BlockDriverState *source, *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode; + Error *local_err = NULL; + QDict *options = NULL; +@@ -3937,12 +3972,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + (arg->mode == NEW_IMAGE_MODE_EXISTING || + !bdrv_has_zero_init(target_bs))); + ++ ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- goto out; ++ aio_context_release(old_context); ++ return; + } + ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, + arg->has_replaces, arg->replaces, arg->sync, + backing_mode, zero_target, +@@ -3984,6 +4029,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + Error *local_err = NULL; + bool zero_target; +@@ -4001,10 +4047,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + + zero_target = (sync == MIRROR_SYNC_MODE_FULL); + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); ++ aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch new file mode 100644 index 0000000..c426384 --- /dev/null +++ b/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch @@ -0,0 +1,144 @@ +From 959955217f745f1ee6cbea97314efe69f2d7dc08 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:43 +0000 +Subject: [PATCH 10/18] blockdev: unify qmp_blockdev_backup and blockdev-backup + transaction paths + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-4-slp@redhat.com> +Patchwork-id: 93756 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/9] blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_blockdev_backup() and +blockdev_backup_prepare(). + +This change unifies both paths, merging do_blockdev_backup() and +blockdev_backup_prepare(), and changing qmp_blockdev_backup() to +create a transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_blockdev_backup() is executed inside a +drained section, as it happens when creating a blockdev-backup +transaction. This change is visible from the user's perspective, as +the job gets paused and immediately resumed before starting the actual +work. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 5b7bfe515ecbd584b40ff6e41d2fd8b37c7d5139) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 60 +++++++++++++----------------------------------------------- + 1 file changed, 13 insertions(+), 47 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 5e85fc0..152a0f7 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1940,16 +1940,13 @@ typedef struct BlockdevBackupState { + BlockJob *job; + } BlockdevBackupState; + +-static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp); +- + static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + { + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackup *backup; +- BlockDriverState *bs, *target; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; + AioContext *aio_context; +- Error *local_err = NULL; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -1959,8 +1956,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + +- target = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target) { ++ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); ++ if (!target_bs) { + return; + } + +@@ -1971,13 +1968,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + /* Paired with .clean() */ + bdrv_drained_begin(state->bs); + +- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } ++ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); + +-out: + aio_context_release(aio_context); + } + +@@ -3695,41 +3689,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) + return bdrv_get_xdbg_block_graph(errp); + } + +-BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp) ++void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) + { +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- AioContext *aio_context; +- BlockJob *job; +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target_bs) { +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- job = do_backup_common(qapi_BlockdevBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) +-{ +- BlockJob *job; +- job = do_blockdev_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, ++ .u.blockdev_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + /* Parameter check and block job starting for drive mirroring. +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch new file mode 100644 index 0000000..9ec1975 --- /dev/null +++ b/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch @@ -0,0 +1,419 @@ +From 4a03ab2a6cc4974d8d43240d1297b09160818af3 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:42 +0000 +Subject: [PATCH 09/18] blockdev: unify qmp_drive_backup and drive-backup + transaction paths + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-3-slp@redhat.com> +Patchwork-id: 93755 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/9] blockdev: unify qmp_drive_backup and drive-backup transaction paths +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Issuing a drive-backup from qmp_drive_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_drive_backup() and +drive_backup_prepare(). + +This change unifies both paths, merging do_drive_backup() and +drive_backup_prepare(), and changing qmp_drive_backup() to create a +transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_drive_backup() is executed inside a drained +section, as it happens when creating a drive-backup transaction. This +change is visible from the user's perspective, as the job gets paused +and immediately resumed before starting the actual work. + +Also fix tests 141, 185 and 219 to cope with the extra +JOB_STATUS_CHANGE lines. + +Signed-off-by: Sergio Lopez +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 2288ccfac96281c316db942d10e3f921c1373064) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 224 ++++++++++++++++++++------------------------- + tests/qemu-iotests/141.out | 2 + + tests/qemu-iotests/185.out | 2 + + tests/qemu-iotests/219 | 7 +- + tests/qemu-iotests/219.out | 8 ++ + 5 files changed, 117 insertions(+), 126 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 553e315..5e85fc0 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1761,39 +1761,128 @@ typedef struct DriveBackupState { + BlockJob *job; + } DriveBackupState; + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp); ++static BlockJob *do_backup_common(BackupCommon *backup, ++ BlockDriverState *bs, ++ BlockDriverState *target_bs, ++ AioContext *aio_context, ++ JobTxn *txn, Error **errp); + + static void drive_backup_prepare(BlkActionState *common, Error **errp) + { + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); +- BlockDriverState *bs; + DriveBackup *backup; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; ++ BlockDriverState *source = NULL; + AioContext *aio_context; ++ QDict *options; + Error *local_err = NULL; ++ int flags; ++ int64_t size; ++ bool set_backing_hd = false; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; + ++ if (!backup->has_mode) { ++ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; ++ } ++ + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return; + } + ++ if (!bs->drv) { ++ error_setg(errp, "Device has no medium"); ++ return; ++ } ++ + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + +- state->bs = bs; ++ if (!backup->has_format) { ++ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? ++ NULL : (char *) bs->drv->format_name; ++ } ++ ++ /* Early check to avoid creating target */ ++ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { ++ goto out; ++ } ++ ++ flags = bs->open_flags | BDRV_O_RDWR; ++ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ ++ if (backup->sync == MIRROR_SYNC_MODE_TOP) { ++ source = backing_bs(bs); ++ if (!source) { ++ backup->sync = MIRROR_SYNC_MODE_FULL; ++ } ++ } ++ if (backup->sync == MIRROR_SYNC_MODE_NONE) { ++ source = bs; ++ flags |= BDRV_O_NO_BACKING; ++ set_backing_hd = true; ++ } ++ ++ size = bdrv_getlength(bs); ++ if (size < 0) { ++ error_setg_errno(errp, -size, "bdrv_getlength failed"); ++ goto out; ++ } ++ ++ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { ++ assert(backup->format); ++ if (source) { ++ bdrv_refresh_filename(source); ++ bdrv_img_create(backup->target, backup->format, source->filename, ++ source->drv->format_name, NULL, ++ size, flags, false, &local_err); ++ } else { ++ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, ++ size, flags, false, &local_err); ++ } ++ } + +- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + ++ options = qdict_new(); ++ qdict_put_str(options, "discard", "unmap"); ++ qdict_put_str(options, "detect-zeroes", "unmap"); ++ if (backup->format) { ++ qdict_put_str(options, "driver", backup->format); ++ } ++ ++ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); ++ if (!target_bs) { ++ goto out; ++ } ++ ++ if (set_backing_hd) { ++ bdrv_set_backing_hd(target_bs, source, &local_err); ++ if (local_err) { ++ goto unref; ++ } ++ } ++ ++ state->bs = bs; ++ ++ state->job = do_backup_common(qapi_DriveBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); ++ ++unref: ++ bdrv_unref(target_bs); + out: + aio_context_release(aio_context); + } +@@ -3587,126 +3676,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, + return job; + } + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp) +-{ +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- BlockDriverState *source = NULL; +- BlockJob *job = NULL; +- AioContext *aio_context; +- QDict *options; +- Error *local_err = NULL; +- int flags; +- int64_t size; +- bool set_backing_hd = false; +- +- if (!backup->has_mode) { +- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +- } +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- if (!bs->drv) { +- error_setg(errp, "Device has no medium"); +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- if (!backup->has_format) { +- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char *) bs->drv->format_name; +- } +- +- /* Early check to avoid creating target */ +- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +- goto out; +- } +- +- flags = bs->open_flags | BDRV_O_RDWR; +- +- /* +- * See if we have a backing HD we can use to create our new image +- * on top of. +- */ +- if (backup->sync == MIRROR_SYNC_MODE_TOP) { +- source = backing_bs(bs); +- if (!source) { +- backup->sync = MIRROR_SYNC_MODE_FULL; +- } +- } +- if (backup->sync == MIRROR_SYNC_MODE_NONE) { +- source = bs; +- flags |= BDRV_O_NO_BACKING; +- set_backing_hd = true; +- } +- +- size = bdrv_getlength(bs); +- if (size < 0) { +- error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; +- } +- +- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { +- assert(backup->format); +- if (source) { +- bdrv_refresh_filename(source); +- bdrv_img_create(backup->target, backup->format, source->filename, +- source->drv->format_name, NULL, +- size, flags, false, &local_err); +- } else { +- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, +- size, flags, false, &local_err); +- } +- } +- +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } +- +- options = qdict_new(); +- qdict_put_str(options, "discard", "unmap"); +- qdict_put_str(options, "detect-zeroes", "unmap"); +- if (backup->format) { +- qdict_put_str(options, "driver", backup->format); +- } +- +- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); +- if (!target_bs) { +- goto out; +- } +- +- if (set_backing_hd) { +- bdrv_set_backing_hd(target_bs, source, &local_err); +- if (local_err) { +- goto unref; +- } +- } +- +- job = do_backup_common(qapi_DriveBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +-unref: +- bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_drive_backup(DriveBackup *arg, Error **errp) ++void qmp_drive_backup(DriveBackup *backup, Error **errp) + { +- +- BlockJob *job; +- job = do_drive_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, ++ .u.drive_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) +diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out +index 3645675..263b680 100644 +--- a/tests/qemu-iotests/141.out ++++ b/tests/qemu-iotests/141.out +@@ -13,6 +13,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. + Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} + {'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} + {'execute': 'block-job-cancel', 'arguments': {'device': 'job0'}} +diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out +index 8379ac5..9a3b657 100644 +--- a/tests/qemu-iotests/185.out ++++ b/tests/qemu-iotests/185.out +@@ -65,6 +65,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l + Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } + {"return": {}} +diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 +index e0c5166..655f54d 100755 +--- a/tests/qemu-iotests/219 ++++ b/tests/qemu-iotests/219 +@@ -63,7 +63,7 @@ def test_pause_resume(vm): + # logged immediately + iotests.log(vm.qmp('query-jobs')) + +-def test_job_lifecycle(vm, job, job_args, has_ready=False): ++def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): + global img_size + + iotests.log('') +@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): + iotests.log('Waiting for PENDING state...') + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ if is_mirror: ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + + if not job_args.get('auto-finalize', True): + # PENDING state: +@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ + + for auto_finalize in [True, False]: + for auto_dismiss in [True, False]: +- test_job_lifecycle(vm, 'drive-backup', job_args={ ++ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ + 'device': 'drive0-node', + 'target': copy_path, + 'sync': 'full', +diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out +index 8ebd3fe..0ea5d0b 100644 +--- a/tests/qemu-iotests/219.out ++++ b/tests/qemu-iotests/219.out +@@ -135,6 +135,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -186,6 +188,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -245,6 +249,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +@@ -304,6 +310,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch b/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch new file mode 100644 index 0000000..5d21bf8 --- /dev/null +++ b/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch @@ -0,0 +1,137 @@ +From f756c1c4590a37c533ec0429644a7034ba35dada Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:38 +0100 +Subject: [PATCH 007/116] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-4-dgilbert@redhat.com> +Patchwork-id: 93459 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 003/112] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Paolo Bonzini + +Since we are actually testing for the newer capng library, rename the +symbol to match. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Paolo Bonzini +(cherry picked from commit a358bca24026a377e0804e137a4499e4e041918d) +Signed-off-by: Miroslav Rezanina +--- + configure | 2 +- + qemu-bridge-helper.c | 6 +++--- + scsi/qemu-pr-helper.c | 12 ++++++------ + 3 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/configure b/configure +index 16564f8..7831618 100755 +--- a/configure ++++ b/configure +@@ -6760,7 +6760,7 @@ if test "$l2tpv3" = "yes" ; then + echo "CONFIG_L2TPV3=y" >> $config_host_mak + fi + if test "$cap_ng" = "yes" ; then +- echo "CONFIG_LIBCAP=y" >> $config_host_mak ++ echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak + fi + echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak + for drv in $audio_drv_list; do +diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c +index 3d50ec0..88b2674 100644 +--- a/qemu-bridge-helper.c ++++ b/qemu-bridge-helper.c +@@ -43,7 +43,7 @@ + + #include "net/tap-linux.h" + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + #include + #endif + +@@ -207,7 +207,7 @@ static int send_fd(int c, int fd) + return sendmsg(c, &msg, 0); + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int drop_privileges(void) + { + /* clear all capabilities */ +@@ -246,7 +246,7 @@ int main(int argc, char **argv) + int access_allowed, access_denied; + int ret = EXIT_SUCCESS; + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + /* if we're run from an suid binary, immediately drop privileges preserving + * cap_net_admin */ + if (geteuid() == 0 && getuid() != geteuid()) { +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index debb18f..0659cee 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -24,7 +24,7 @@ + #include + #include + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + #include + #endif + #include +@@ -70,7 +70,7 @@ static int num_active_sockets = 1; + static int noisy; + static int verbose; + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int uid = -1; + static int gid = -1; + #endif +@@ -97,7 +97,7 @@ static void usage(const char *name) + " (default '%s')\n" + " -T, --trace [[enable=]][,events=][,file=]\n" + " specify tracing options\n" +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + " -u, --user=USER user to drop privileges to\n" + " -g, --group=GROUP group to drop privileges to\n" + #endif +@@ -827,7 +827,7 @@ static void close_server_socket(void) + num_active_sockets--; + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int drop_privileges(void) + { + /* clear all capabilities */ +@@ -920,7 +920,7 @@ int main(int argc, char **argv) + pidfile = g_strdup(optarg); + pidfile_specified = true; + break; +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + case 'u': { + unsigned long res; + struct passwd *userinfo = getpwnam(optarg); +@@ -1056,7 +1056,7 @@ int main(int argc, char **argv) + exit(EXIT_FAILURE); + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + if (drop_privileges() < 0) { + error_report("Failed to drop privileges: %s", strerror(errno)); + exit(EXIT_FAILURE); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch b/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch new file mode 100644 index 0000000..5b1b170 --- /dev/null +++ b/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch @@ -0,0 +1,2463 @@ +From fc2d0dfe60b14992a9b67e7a18394ba6365dc5ed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 18 Mar 2020 18:10:40 +0000 +Subject: [PATCH 2/2] build-sys: do not make qemu-ga link with pixman +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200318181040.256425-1-marcandre.lureau@redhat.com> +Patchwork-id: 94381 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] build-sys: do not make qemu-ga link with pixman +Bugzilla: 1811670 +RH-Acked-by: Markus Armbruster +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange + +Since commit d52c454aadcdae74506f315ebf8b58bb79a05573 ("contrib: add +vhost-user-gpu"), qemu-ga is linking with pixman. + +This is because the Make-based build-system use a global namespace for +variables, and we rely on "main.o-libs" for different linking targets. + +Note: this kind of variable clashing is hard to fix or prevent +currently. meson should help, as declarations have a linear +dependency and doesn't rely so much on variables and clever tricks. + +Note2: we have a lot of main.c (or other duplicated names!) in +tree. Imho, it would be annoying and a bad workaroud to rename all +those to avoid conflicts like I did here. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 + +Signed-off-by: Marc-André Lureau +Message-Id: <20200311160923.882474-1-marcandre.lureau@redhat.com> +Signed-off-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=27330493 + +(cherry picked from commit 5b42bc5ce9ab4a3171819feea5042931817211fd) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + contrib/vhost-user-gpu/Makefile.objs | 6 +- + contrib/vhost-user-gpu/main.c | 1191 ------------------------------- + contrib/vhost-user-gpu/vhost-user-gpu.c | 1191 +++++++++++++++++++++++++++++++ + 3 files changed, 1194 insertions(+), 1194 deletions(-) + delete mode 100644 contrib/vhost-user-gpu/main.c + create mode 100644 contrib/vhost-user-gpu/vhost-user-gpu.c + +diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs +index 6170c91..0929609 100644 +--- a/contrib/vhost-user-gpu/Makefile.objs ++++ b/contrib/vhost-user-gpu/Makefile.objs +@@ -1,7 +1,7 @@ +-vhost-user-gpu-obj-y = main.o virgl.o vugbm.o ++vhost-user-gpu-obj-y = vhost-user-gpu.o virgl.o vugbm.o + +-main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) +-main.o-libs := $(PIXMAN_LIBS) ++vhost-user-gpu.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) ++vhost-user-gpu.o-libs := $(PIXMAN_LIBS) + + virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS) + virgl.o-libs := $(VIRGL_LIBS) +diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c +deleted file mode 100644 +index b45d201..0000000 +--- a/contrib/vhost-user-gpu/main.c ++++ /dev/null +@@ -1,1191 +0,0 @@ +-/* +- * Virtio vhost-user GPU Device +- * +- * Copyright Red Hat, Inc. 2013-2018 +- * +- * Authors: +- * Dave Airlie +- * Gerd Hoffmann +- * Marc-André Lureau +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- */ +-#include "qemu/osdep.h" +-#include "qemu/drm.h" +-#include "qapi/error.h" +-#include "qemu/sockets.h" +- +-#include +-#include +- +-#include "vugpu.h" +-#include "hw/virtio/virtio-gpu-bswap.h" +-#include "hw/virtio/virtio-gpu-pixman.h" +-#include "virgl.h" +-#include "vugbm.h" +- +-enum { +- VHOST_USER_GPU_MAX_QUEUES = 2, +-}; +- +-struct virtio_gpu_simple_resource { +- uint32_t resource_id; +- uint32_t width; +- uint32_t height; +- uint32_t format; +- struct iovec *iov; +- unsigned int iov_cnt; +- uint32_t scanout_bitmask; +- pixman_image_t *image; +- struct vugbm_buffer buffer; +- QTAILQ_ENTRY(virtio_gpu_simple_resource) next; +-}; +- +-static gboolean opt_print_caps; +-static int opt_fdnum = -1; +-static char *opt_socket_path; +-static char *opt_render_node; +-static gboolean opt_virgl; +- +-static void vg_handle_ctrl(VuDev *dev, int qidx); +- +-static const char * +-vg_cmd_to_string(int cmd) +-{ +-#define CMD(cmd) [cmd] = #cmd +- static const char *vg_cmd_str[] = { +- CMD(VIRTIO_GPU_UNDEFINED), +- +- /* 2d commands */ +- CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), +- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), +- CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), +- CMD(VIRTIO_GPU_CMD_SET_SCANOUT), +- CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), +- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), +- CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), +- CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), +- CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), +- CMD(VIRTIO_GPU_CMD_GET_CAPSET), +- +- /* 3d commands */ +- CMD(VIRTIO_GPU_CMD_CTX_CREATE), +- CMD(VIRTIO_GPU_CMD_CTX_DESTROY), +- CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), +- CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), +- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), +- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), +- CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), +- CMD(VIRTIO_GPU_CMD_SUBMIT_3D), +- +- /* cursor commands */ +- CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), +- CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), +- }; +-#undef REQ +- +- if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { +- return vg_cmd_str[cmd]; +- } else { +- return "unknown"; +- } +-} +- +-static int +-vg_sock_fd_read(int sock, void *buf, ssize_t buflen) +-{ +- int ret; +- +- do { +- ret = read(sock, buf, buflen); +- } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); +- +- g_warn_if_fail(ret == buflen); +- return ret; +-} +- +-static void +-vg_sock_fd_close(VuGpu *g) +-{ +- if (g->sock_fd >= 0) { +- close(g->sock_fd); +- g->sock_fd = -1; +- } +-} +- +-static gboolean +-source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) +-{ +- VuGpu *g = user_data; +- +- if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { +- return G_SOURCE_CONTINUE; +- } +- +- /* resume */ +- g->wait_ok = 0; +- vg_handle_ctrl(&g->dev.parent, 0); +- +- return G_SOURCE_REMOVE; +-} +- +-void +-vg_wait_ok(VuGpu *g) +-{ +- assert(g->wait_ok == 0); +- g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, +- source_wait_cb, g); +-} +- +-static int +-vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) +-{ +- ssize_t ret; +- struct iovec iov = { +- .iov_base = (void *)buf, +- .iov_len = buflen, +- }; +- struct msghdr msg = { +- .msg_iov = &iov, +- .msg_iovlen = 1, +- }; +- union { +- struct cmsghdr cmsghdr; +- char control[CMSG_SPACE(sizeof(int))]; +- } cmsgu; +- struct cmsghdr *cmsg; +- +- if (fd != -1) { +- msg.msg_control = cmsgu.control; +- msg.msg_controllen = sizeof(cmsgu.control); +- +- cmsg = CMSG_FIRSTHDR(&msg); +- cmsg->cmsg_len = CMSG_LEN(sizeof(int)); +- cmsg->cmsg_level = SOL_SOCKET; +- cmsg->cmsg_type = SCM_RIGHTS; +- +- *((int *)CMSG_DATA(cmsg)) = fd; +- } +- +- do { +- ret = sendmsg(sock, &msg, 0); +- } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); +- +- g_warn_if_fail(ret == buflen); +- return ret; +-} +- +-void +-vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) +-{ +- if (vg_sock_fd_write(vg->sock_fd, msg, +- VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { +- vg_sock_fd_close(vg); +- } +-} +- +-bool +-vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, +- gpointer payload) +-{ +- uint32_t req, flags, size; +- +- if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || +- vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || +- vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { +- goto err; +- } +- +- g_return_val_if_fail(req == expect_req, false); +- g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); +- g_return_val_if_fail(size == expect_size, false); +- +- if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { +- goto err; +- } +- +- return true; +- +-err: +- vg_sock_fd_close(g); +- return false; +-} +- +-static struct virtio_gpu_simple_resource * +-virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) +-{ +- struct virtio_gpu_simple_resource *res; +- +- QTAILQ_FOREACH(res, &g->reslist, next) { +- if (res->resource_id == resource_id) { +- return res; +- } +- } +- return NULL; +-} +- +-void +-vg_ctrl_response(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd, +- struct virtio_gpu_ctrl_hdr *resp, +- size_t resp_len) +-{ +- size_t s; +- +- if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { +- resp->flags |= VIRTIO_GPU_FLAG_FENCE; +- resp->fence_id = cmd->cmd_hdr.fence_id; +- resp->ctx_id = cmd->cmd_hdr.ctx_id; +- } +- virtio_gpu_ctrl_hdr_bswap(resp); +- s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); +- if (s != resp_len) { +- g_critical("%s: response size incorrect %zu vs %zu", +- __func__, s, resp_len); +- } +- vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); +- vu_queue_notify(&g->dev.parent, cmd->vq); +- cmd->finished = true; +-} +- +-void +-vg_ctrl_response_nodata(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd, +- enum virtio_gpu_ctrl_type type) +-{ +- struct virtio_gpu_ctrl_hdr resp = { +- .type = type, +- }; +- +- vg_ctrl_response(g, cmd, &resp, sizeof(resp)); +-} +- +-void +-vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_resp_display_info dpy_info = { {} }; +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_GET_DISPLAY_INFO, +- .size = 0, +- }; +- +- assert(vg->wait_ok == 0); +- +- vg_send_msg(vg, &msg, -1); +- if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { +- return; +- } +- +- vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); +-} +- +-static void +-vg_resource_create_2d(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- pixman_format_code_t pformat; +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_create_2d c2d; +- +- VUGPU_FILL_CMD(c2d); +- virtio_gpu_bswap_32(&c2d, sizeof(c2d)); +- +- if (c2d.resource_id == 0) { +- g_critical("%s: resource id 0 is not allowed", __func__); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- res = virtio_gpu_find_resource(g, c2d.resource_id); +- if (res) { +- g_critical("%s: resource already exists %d", __func__, c2d.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- res = g_new0(struct virtio_gpu_simple_resource, 1); +- res->width = c2d.width; +- res->height = c2d.height; +- res->format = c2d.format; +- res->resource_id = c2d.resource_id; +- +- pformat = virtio_gpu_get_pixman_format(c2d.format); +- if (!pformat) { +- g_critical("%s: host couldn't handle guest format %d", +- __func__, c2d.format); +- g_free(res); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); +- res->image = pixman_image_create_bits(pformat, +- c2d.width, +- c2d.height, +- (uint32_t *)res->buffer.mmap, +- res->buffer.stride); +- if (!res->image) { +- g_critical("%s: resource creation failed %d %d %d", +- __func__, c2d.resource_id, c2d.width, c2d.height); +- g_free(res); +- cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; +- return; +- } +- +- QTAILQ_INSERT_HEAD(&g->reslist, res, next); +-} +- +-static void +-vg_disable_scanout(VuGpu *g, int scanout_id) +-{ +- struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; +- struct virtio_gpu_simple_resource *res; +- +- if (scanout->resource_id == 0) { +- return; +- } +- +- res = virtio_gpu_find_resource(g, scanout->resource_id); +- if (res) { +- res->scanout_bitmask &= ~(1 << scanout_id); +- } +- +- scanout->width = 0; +- scanout->height = 0; +- +- if (g->sock_fd >= 0) { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_SCANOUT, +- .size = sizeof(VhostUserGpuScanout), +- .payload.scanout.scanout_id = scanout_id, +- }; +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_resource_destroy(VuGpu *g, +- struct virtio_gpu_simple_resource *res) +-{ +- int i; +- +- if (res->scanout_bitmask) { +- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { +- if (res->scanout_bitmask & (1 << i)) { +- vg_disable_scanout(g, i); +- } +- } +- } +- +- vugbm_buffer_destroy(&res->buffer); +- pixman_image_unref(res->image); +- QTAILQ_REMOVE(&g->reslist, res, next); +- g_free(res); +-} +- +-static void +-vg_resource_unref(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_unref unref; +- +- VUGPU_FILL_CMD(unref); +- virtio_gpu_bswap_32(&unref, sizeof(unref)); +- +- res = virtio_gpu_find_resource(g, unref.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, unref.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- vg_resource_destroy(g, res); +-} +- +-int +-vg_create_mapping_iov(VuGpu *g, +- struct virtio_gpu_resource_attach_backing *ab, +- struct virtio_gpu_ctrl_command *cmd, +- struct iovec **iov) +-{ +- struct virtio_gpu_mem_entry *ents; +- size_t esize, s; +- int i; +- +- if (ab->nr_entries > 16384) { +- g_critical("%s: nr_entries is too big (%d > 16384)", +- __func__, ab->nr_entries); +- return -1; +- } +- +- esize = sizeof(*ents) * ab->nr_entries; +- ents = g_malloc(esize); +- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, +- sizeof(*ab), ents, esize); +- if (s != esize) { +- g_critical("%s: command data size incorrect %zu vs %zu", +- __func__, s, esize); +- g_free(ents); +- return -1; +- } +- +- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); +- for (i = 0; i < ab->nr_entries; i++) { +- uint64_t len = ents[i].length; +- (*iov)[i].iov_len = ents[i].length; +- (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); +- if (!(*iov)[i].iov_base || len != ents[i].length) { +- g_critical("%s: resource %d element %d", +- __func__, ab->resource_id, i); +- g_free(*iov); +- g_free(ents); +- *iov = NULL; +- return -1; +- } +- } +- g_free(ents); +- return 0; +-} +- +-static void +-vg_resource_attach_backing(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_attach_backing ab; +- int ret; +- +- VUGPU_FILL_CMD(ab); +- virtio_gpu_bswap_32(&ab, sizeof(ab)); +- +- res = virtio_gpu_find_resource(g, ab.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, ab.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); +- if (ret != 0) { +- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- return; +- } +- +- res->iov_cnt = ab.nr_entries; +-} +- +-static void +-vg_resource_detach_backing(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_detach_backing detach; +- +- VUGPU_FILL_CMD(detach); +- virtio_gpu_bswap_32(&detach, sizeof(detach)); +- +- res = virtio_gpu_find_resource(g, detach.resource_id); +- if (!res || !res->iov) { +- g_critical("%s: illegal resource specified %d", +- __func__, detach.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- g_free(res->iov); +- res->iov = NULL; +- res->iov_cnt = 0; +-} +- +-static void +-vg_transfer_to_host_2d(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- int h; +- uint32_t src_offset, dst_offset, stride; +- int bpp; +- pixman_format_code_t format; +- struct virtio_gpu_transfer_to_host_2d t2d; +- +- VUGPU_FILL_CMD(t2d); +- virtio_gpu_t2d_bswap(&t2d); +- +- res = virtio_gpu_find_resource(g, t2d.resource_id); +- if (!res || !res->iov) { +- g_critical("%s: illegal resource specified %d", +- __func__, t2d.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (t2d.r.x > res->width || +- t2d.r.y > res->height || +- t2d.r.width > res->width || +- t2d.r.height > res->height || +- t2d.r.x + t2d.r.width > res->width || +- t2d.r.y + t2d.r.height > res->height) { +- g_critical("%s: transfer bounds outside resource" +- " bounds for resource %d: %d %d %d %d vs %d %d", +- __func__, t2d.resource_id, t2d.r.x, t2d.r.y, +- t2d.r.width, t2d.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- format = pixman_image_get_format(res->image); +- bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; +- stride = pixman_image_get_stride(res->image); +- +- if (t2d.offset || t2d.r.x || t2d.r.y || +- t2d.r.width != pixman_image_get_width(res->image)) { +- void *img_data = pixman_image_get_data(res->image); +- for (h = 0; h < t2d.r.height; h++) { +- src_offset = t2d.offset + stride * h; +- dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); +- +- iov_to_buf(res->iov, res->iov_cnt, src_offset, +- img_data +- + dst_offset, t2d.r.width * bpp); +- } +- } else { +- iov_to_buf(res->iov, res->iov_cnt, 0, +- pixman_image_get_data(res->image), +- pixman_image_get_stride(res->image) +- * pixman_image_get_height(res->image)); +- } +-} +- +-static void +-vg_set_scanout(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res, *ores; +- struct virtio_gpu_scanout *scanout; +- struct virtio_gpu_set_scanout ss; +- int fd; +- +- VUGPU_FILL_CMD(ss); +- virtio_gpu_bswap_32(&ss, sizeof(ss)); +- +- if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { +- g_critical("%s: illegal scanout id specified %d", +- __func__, ss.scanout_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; +- return; +- } +- +- if (ss.resource_id == 0) { +- vg_disable_scanout(g, ss.scanout_id); +- return; +- } +- +- /* create a surface for this scanout */ +- res = virtio_gpu_find_resource(g, ss.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, ss.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (ss.r.x > res->width || +- ss.r.y > res->height || +- ss.r.width > res->width || +- ss.r.height > res->height || +- ss.r.x + ss.r.width > res->width || +- ss.r.y + ss.r.height > res->height) { +- g_critical("%s: illegal scanout %d bounds for" +- " resource %d, (%d,%d)+%d,%d vs %d %d", +- __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, +- ss.r.width, ss.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- scanout = &g->scanout[ss.scanout_id]; +- +- ores = virtio_gpu_find_resource(g, scanout->resource_id); +- if (ores) { +- ores->scanout_bitmask &= ~(1 << ss.scanout_id); +- } +- +- res->scanout_bitmask |= (1 << ss.scanout_id); +- scanout->resource_id = ss.resource_id; +- scanout->x = ss.r.x; +- scanout->y = ss.r.y; +- scanout->width = ss.r.width; +- scanout->height = ss.r.height; +- +- struct vugbm_buffer *buffer = &res->buffer; +- +- if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_DMABUF_SCANOUT, +- .size = sizeof(VhostUserGpuDMABUFScanout), +- .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { +- .scanout_id = ss.scanout_id, +- .x = ss.r.x, +- .y = ss.r.y, +- .width = ss.r.width, +- .height = ss.r.height, +- .fd_width = buffer->width, +- .fd_height = buffer->height, +- .fd_stride = buffer->stride, +- .fd_drm_fourcc = buffer->format +- } +- }; +- +- if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { +- vg_send_msg(g, &msg, fd); +- close(fd); +- } +- } else { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_SCANOUT, +- .size = sizeof(VhostUserGpuScanout), +- .payload.scanout = (VhostUserGpuScanout) { +- .scanout_id = ss.scanout_id, +- .width = scanout->width, +- .height = scanout->height +- } +- }; +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_resource_flush(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_flush rf; +- pixman_region16_t flush_region; +- int i; +- +- VUGPU_FILL_CMD(rf); +- virtio_gpu_bswap_32(&rf, sizeof(rf)); +- +- res = virtio_gpu_find_resource(g, rf.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d\n", +- __func__, rf.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (rf.r.x > res->width || +- rf.r.y > res->height || +- rf.r.width > res->width || +- rf.r.height > res->height || +- rf.r.x + rf.r.width > res->width || +- rf.r.y + rf.r.height > res->height) { +- g_critical("%s: flush bounds outside resource" +- " bounds for resource %d: %d %d %d %d vs %d %d\n", +- __func__, rf.resource_id, rf.r.x, rf.r.y, +- rf.r.width, rf.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- pixman_region_init_rect(&flush_region, +- rf.r.x, rf.r.y, rf.r.width, rf.r.height); +- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { +- struct virtio_gpu_scanout *scanout; +- pixman_region16_t region, finalregion; +- pixman_box16_t *extents; +- +- if (!(res->scanout_bitmask & (1 << i))) { +- continue; +- } +- scanout = &g->scanout[i]; +- +- pixman_region_init(&finalregion); +- pixman_region_init_rect(®ion, scanout->x, scanout->y, +- scanout->width, scanout->height); +- +- pixman_region_intersect(&finalregion, &flush_region, ®ion); +- +- extents = pixman_region_extents(&finalregion); +- size_t width = extents->x2 - extents->x1; +- size_t height = extents->y2 - extents->y1; +- +- if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { +- VhostUserGpuMsg vmsg = { +- .request = VHOST_USER_GPU_DMABUF_UPDATE, +- .size = sizeof(VhostUserGpuUpdate), +- .payload.update = (VhostUserGpuUpdate) { +- .scanout_id = i, +- .x = extents->x1, +- .y = extents->y1, +- .width = width, +- .height = height, +- } +- }; +- vg_send_msg(g, &vmsg, -1); +- vg_wait_ok(g); +- } else { +- size_t bpp = +- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; +- size_t size = width * height * bpp; +- +- void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + +- sizeof(VhostUserGpuUpdate) + size); +- VhostUserGpuMsg *msg = p; +- msg->request = VHOST_USER_GPU_UPDATE; +- msg->size = sizeof(VhostUserGpuUpdate) + size; +- msg->payload.update = (VhostUserGpuUpdate) { +- .scanout_id = i, +- .x = extents->x1, +- .y = extents->y1, +- .width = width, +- .height = height, +- }; +- pixman_image_t *i = +- pixman_image_create_bits(pixman_image_get_format(res->image), +- msg->payload.update.width, +- msg->payload.update.height, +- p + offsetof(VhostUserGpuMsg, +- payload.update.data), +- width * bpp); +- pixman_image_composite(PIXMAN_OP_SRC, +- res->image, NULL, i, +- extents->x1, extents->y1, +- 0, 0, 0, 0, +- width, height); +- pixman_image_unref(i); +- vg_send_msg(g, msg, -1); +- g_free(msg); +- } +- pixman_region_fini(®ion); +- pixman_region_fini(&finalregion); +- } +- pixman_region_fini(&flush_region); +-} +- +-static void +-vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +-{ +- switch (cmd->cmd_hdr.type) { +- case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: +- vg_get_display_info(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: +- vg_resource_create_2d(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_UNREF: +- vg_resource_unref(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_FLUSH: +- vg_resource_flush(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: +- vg_transfer_to_host_2d(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_SET_SCANOUT: +- vg_set_scanout(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: +- vg_resource_attach_backing(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: +- vg_resource_detach_backing(vg, cmd); +- break; +- /* case VIRTIO_GPU_CMD_GET_EDID: */ +- /* break */ +- default: +- g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); +- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- break; +- } +- if (!cmd->finished) { +- vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : +- VIRTIO_GPU_RESP_OK_NODATA); +- } +-} +- +-static void +-vg_handle_ctrl(VuDev *dev, int qidx) +-{ +- VuGpu *vg = container_of(dev, VuGpu, dev.parent); +- VuVirtq *vq = vu_get_queue(dev, qidx); +- struct virtio_gpu_ctrl_command *cmd = NULL; +- size_t len; +- +- for (;;) { +- if (vg->wait_ok != 0) { +- return; +- } +- +- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); +- if (!cmd) { +- break; +- } +- cmd->vq = vq; +- cmd->error = 0; +- cmd->finished = false; +- +- len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, +- 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); +- if (len != sizeof(cmd->cmd_hdr)) { +- g_warning("%s: command size incorrect %zu vs %zu\n", +- __func__, len, sizeof(cmd->cmd_hdr)); +- } +- +- virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); +- g_debug("%d %s\n", cmd->cmd_hdr.type, +- vg_cmd_to_string(cmd->cmd_hdr.type)); +- +- if (vg->virgl) { +- vg_virgl_process_cmd(vg, cmd); +- } else { +- vg_process_cmd(vg, cmd); +- } +- +- if (!cmd->finished) { +- QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); +- vg->inflight++; +- } else { +- g_free(cmd); +- } +- } +-} +- +-static void +-update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) +-{ +- struct virtio_gpu_simple_resource *res; +- +- res = virtio_gpu_find_resource(g, resource_id); +- g_return_if_fail(res != NULL); +- g_return_if_fail(pixman_image_get_width(res->image) == 64); +- g_return_if_fail(pixman_image_get_height(res->image) == 64); +- g_return_if_fail( +- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); +- +- memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); +-} +- +-static void +-vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) +-{ +- bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; +- +- g_debug("%s move:%d\n", G_STRFUNC, move); +- +- if (move) { +- VhostUserGpuMsg msg = { +- .request = cursor->resource_id ? +- VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, +- .size = sizeof(VhostUserGpuCursorPos), +- .payload.cursor_pos = { +- .scanout_id = cursor->pos.scanout_id, +- .x = cursor->pos.x, +- .y = cursor->pos.y, +- } +- }; +- vg_send_msg(g, &msg, -1); +- } else { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_CURSOR_UPDATE, +- .size = sizeof(VhostUserGpuCursorUpdate), +- .payload.cursor_update = { +- .pos = { +- .scanout_id = cursor->pos.scanout_id, +- .x = cursor->pos.x, +- .y = cursor->pos.y, +- }, +- .hot_x = cursor->hot_x, +- .hot_y = cursor->hot_y, +- } +- }; +- if (g->virgl) { +- vg_virgl_update_cursor_data(g, cursor->resource_id, +- msg.payload.cursor_update.data); +- } else { +- update_cursor_data_simple(g, cursor->resource_id, +- msg.payload.cursor_update.data); +- } +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_handle_cursor(VuDev *dev, int qidx) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- VuVirtq *vq = vu_get_queue(dev, qidx); +- VuVirtqElement *elem; +- size_t len; +- struct virtio_gpu_update_cursor cursor; +- +- for (;;) { +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); +- if (!elem) { +- break; +- } +- g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); +- +- len = iov_to_buf(elem->out_sg, elem->out_num, +- 0, &cursor, sizeof(cursor)); +- if (len != sizeof(cursor)) { +- g_warning("%s: cursor size incorrect %zu vs %zu\n", +- __func__, len, sizeof(cursor)); +- } else { +- virtio_gpu_bswap_32(&cursor, sizeof(cursor)); +- vg_process_cursor_cmd(g, &cursor); +- } +- vu_queue_push(dev, vq, elem, 0); +- vu_queue_notify(dev, vq); +- g_free(elem); +- } +-} +- +-static void +-vg_panic(VuDev *dev, const char *msg) +-{ +- g_critical("%s\n", msg); +- exit(1); +-} +- +-static void +-vg_queue_set_started(VuDev *dev, int qidx, bool started) +-{ +- VuVirtq *vq = vu_get_queue(dev, qidx); +- +- g_debug("queue started %d:%d\n", qidx, started); +- +- switch (qidx) { +- case 0: +- vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); +- break; +- case 1: +- vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); +- break; +- default: +- break; +- } +-} +- +-static void +-set_gpu_protocol_features(VuGpu *g) +-{ +- uint64_t u64; +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES +- }; +- +- assert(g->wait_ok == 0); +- vg_send_msg(g, &msg, -1); +- if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { +- return; +- } +- +- msg = (VhostUserGpuMsg) { +- .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, +- .size = sizeof(uint64_t), +- .payload.u64 = 0 +- }; +- vg_send_msg(g, &msg, -1); +-} +- +-static int +-vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- +- switch (msg->request) { +- case VHOST_USER_GPU_SET_SOCKET: { +- g_return_val_if_fail(msg->fd_num == 1, 1); +- g_return_val_if_fail(g->sock_fd == -1, 1); +- g->sock_fd = msg->fds[0]; +- set_gpu_protocol_features(g); +- return 1; +- } +- default: +- return 0; +- } +- +- return 0; +-} +- +-static uint64_t +-vg_get_features(VuDev *dev) +-{ +- uint64_t features = 0; +- +- if (opt_virgl) { +- features |= 1 << VIRTIO_GPU_F_VIRGL; +- } +- +- return features; +-} +- +-static void +-vg_set_features(VuDev *dev, uint64_t features) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); +- +- if (virgl && !g->virgl_inited) { +- if (!vg_virgl_init(g)) { +- vg_panic(dev, "Failed to initialize virgl"); +- } +- g->virgl_inited = true; +- } +- +- g->virgl = virgl; +-} +- +-static int +-vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- +- g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); +- +- if (opt_virgl) { +- g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); +- } +- +- memcpy(config, &g->virtio_config, len); +- +- return 0; +-} +- +-static int +-vg_set_config(VuDev *dev, const uint8_t *data, +- uint32_t offset, uint32_t size, +- uint32_t flags) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; +- +- if (config->events_clear) { +- g->virtio_config.events_read &= ~config->events_clear; +- } +- +- return 0; +-} +- +-static const VuDevIface vuiface = { +- .set_features = vg_set_features, +- .get_features = vg_get_features, +- .queue_set_started = vg_queue_set_started, +- .process_msg = vg_process_msg, +- .get_config = vg_get_config, +- .set_config = vg_set_config, +-}; +- +-static void +-vg_destroy(VuGpu *g) +-{ +- struct virtio_gpu_simple_resource *res, *tmp; +- +- vug_deinit(&g->dev); +- +- vg_sock_fd_close(g); +- +- QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { +- vg_resource_destroy(g, res); +- } +- +- vugbm_device_destroy(&g->gdev); +-} +- +-static GOptionEntry entries[] = { +- { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, +- "Print capabilities", NULL }, +- { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, +- "Use inherited fd socket", "FDNUM" }, +- { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, +- "Use UNIX socket path", "PATH" }, +- { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, +- "Specify DRM render node", "PATH" }, +- { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, +- "Turn virgl rendering on", NULL }, +- { NULL, } +-}; +- +-int +-main(int argc, char *argv[]) +-{ +- GOptionContext *context; +- GError *error = NULL; +- GMainLoop *loop = NULL; +- int fd; +- VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; +- +- QTAILQ_INIT(&g.reslist); +- QTAILQ_INIT(&g.fenceq); +- +- context = g_option_context_new("QEMU vhost-user-gpu"); +- g_option_context_add_main_entries(context, entries, NULL); +- if (!g_option_context_parse(context, &argc, &argv, &error)) { +- g_printerr("Option parsing failed: %s\n", error->message); +- exit(EXIT_FAILURE); +- } +- g_option_context_free(context); +- +- if (opt_print_caps) { +- g_print("{\n"); +- g_print(" \"type\": \"gpu\",\n"); +- g_print(" \"features\": [\n"); +- g_print(" \"render-node\",\n"); +- g_print(" \"virgl\"\n"); +- g_print(" ]\n"); +- g_print("}\n"); +- exit(EXIT_SUCCESS); +- } +- +- g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); +- if (opt_render_node && g.drm_rnode_fd == -1) { +- g_printerr("Failed to open DRM rendernode.\n"); +- exit(EXIT_FAILURE); +- } +- +- if (g.drm_rnode_fd >= 0) { +- if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { +- g_warning("Failed to init DRM device, using fallback path"); +- } +- } +- +- if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { +- g_printerr("Please specify either --fd or --socket-path\n"); +- exit(EXIT_FAILURE); +- } +- +- if (opt_socket_path) { +- int lsock = unix_listen(opt_socket_path, &error_fatal); +- if (lsock < 0) { +- g_printerr("Failed to listen on %s.\n", opt_socket_path); +- exit(EXIT_FAILURE); +- } +- fd = accept(lsock, NULL, NULL); +- close(lsock); +- } else { +- fd = opt_fdnum; +- } +- if (fd == -1) { +- g_printerr("Invalid vhost-user socket.\n"); +- exit(EXIT_FAILURE); +- } +- +- if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { +- g_printerr("Failed to initialize libvhost-user-glib.\n"); +- exit(EXIT_FAILURE); +- } +- +- loop = g_main_loop_new(NULL, FALSE); +- g_main_loop_run(loop); +- g_main_loop_unref(loop); +- +- vg_destroy(&g); +- if (g.drm_rnode_fd >= 0) { +- close(g.drm_rnode_fd); +- } +- +- return 0; +-} +diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c +new file mode 100644 +index 0000000..b45d201 +--- /dev/null ++++ b/contrib/vhost-user-gpu/vhost-user-gpu.c +@@ -0,0 +1,1191 @@ ++/* ++ * Virtio vhost-user GPU Device ++ * ++ * Copyright Red Hat, Inc. 2013-2018 ++ * ++ * Authors: ++ * Dave Airlie ++ * Gerd Hoffmann ++ * Marc-André Lureau ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/drm.h" ++#include "qapi/error.h" ++#include "qemu/sockets.h" ++ ++#include ++#include ++ ++#include "vugpu.h" ++#include "hw/virtio/virtio-gpu-bswap.h" ++#include "hw/virtio/virtio-gpu-pixman.h" ++#include "virgl.h" ++#include "vugbm.h" ++ ++enum { ++ VHOST_USER_GPU_MAX_QUEUES = 2, ++}; ++ ++struct virtio_gpu_simple_resource { ++ uint32_t resource_id; ++ uint32_t width; ++ uint32_t height; ++ uint32_t format; ++ struct iovec *iov; ++ unsigned int iov_cnt; ++ uint32_t scanout_bitmask; ++ pixman_image_t *image; ++ struct vugbm_buffer buffer; ++ QTAILQ_ENTRY(virtio_gpu_simple_resource) next; ++}; ++ ++static gboolean opt_print_caps; ++static int opt_fdnum = -1; ++static char *opt_socket_path; ++static char *opt_render_node; ++static gboolean opt_virgl; ++ ++static void vg_handle_ctrl(VuDev *dev, int qidx); ++ ++static const char * ++vg_cmd_to_string(int cmd) ++{ ++#define CMD(cmd) [cmd] = #cmd ++ static const char *vg_cmd_str[] = { ++ CMD(VIRTIO_GPU_UNDEFINED), ++ ++ /* 2d commands */ ++ CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), ++ CMD(VIRTIO_GPU_CMD_SET_SCANOUT), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), ++ CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), ++ CMD(VIRTIO_GPU_CMD_GET_CAPSET), ++ ++ /* 3d commands */ ++ CMD(VIRTIO_GPU_CMD_CTX_CREATE), ++ CMD(VIRTIO_GPU_CMD_CTX_DESTROY), ++ CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), ++ CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), ++ CMD(VIRTIO_GPU_CMD_SUBMIT_3D), ++ ++ /* cursor commands */ ++ CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), ++ CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), ++ }; ++#undef REQ ++ ++ if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { ++ return vg_cmd_str[cmd]; ++ } else { ++ return "unknown"; ++ } ++} ++ ++static int ++vg_sock_fd_read(int sock, void *buf, ssize_t buflen) ++{ ++ int ret; ++ ++ do { ++ ret = read(sock, buf, buflen); ++ } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); ++ ++ g_warn_if_fail(ret == buflen); ++ return ret; ++} ++ ++static void ++vg_sock_fd_close(VuGpu *g) ++{ ++ if (g->sock_fd >= 0) { ++ close(g->sock_fd); ++ g->sock_fd = -1; ++ } ++} ++ ++static gboolean ++source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) ++{ ++ VuGpu *g = user_data; ++ ++ if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { ++ return G_SOURCE_CONTINUE; ++ } ++ ++ /* resume */ ++ g->wait_ok = 0; ++ vg_handle_ctrl(&g->dev.parent, 0); ++ ++ return G_SOURCE_REMOVE; ++} ++ ++void ++vg_wait_ok(VuGpu *g) ++{ ++ assert(g->wait_ok == 0); ++ g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, ++ source_wait_cb, g); ++} ++ ++static int ++vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) ++{ ++ ssize_t ret; ++ struct iovec iov = { ++ .iov_base = (void *)buf, ++ .iov_len = buflen, ++ }; ++ struct msghdr msg = { ++ .msg_iov = &iov, ++ .msg_iovlen = 1, ++ }; ++ union { ++ struct cmsghdr cmsghdr; ++ char control[CMSG_SPACE(sizeof(int))]; ++ } cmsgu; ++ struct cmsghdr *cmsg; ++ ++ if (fd != -1) { ++ msg.msg_control = cmsgu.control; ++ msg.msg_controllen = sizeof(cmsgu.control); ++ ++ cmsg = CMSG_FIRSTHDR(&msg); ++ cmsg->cmsg_len = CMSG_LEN(sizeof(int)); ++ cmsg->cmsg_level = SOL_SOCKET; ++ cmsg->cmsg_type = SCM_RIGHTS; ++ ++ *((int *)CMSG_DATA(cmsg)) = fd; ++ } ++ ++ do { ++ ret = sendmsg(sock, &msg, 0); ++ } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); ++ ++ g_warn_if_fail(ret == buflen); ++ return ret; ++} ++ ++void ++vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) ++{ ++ if (vg_sock_fd_write(vg->sock_fd, msg, ++ VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { ++ vg_sock_fd_close(vg); ++ } ++} ++ ++bool ++vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, ++ gpointer payload) ++{ ++ uint32_t req, flags, size; ++ ++ if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || ++ vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || ++ vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { ++ goto err; ++ } ++ ++ g_return_val_if_fail(req == expect_req, false); ++ g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); ++ g_return_val_if_fail(size == expect_size, false); ++ ++ if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { ++ goto err; ++ } ++ ++ return true; ++ ++err: ++ vg_sock_fd_close(g); ++ return false; ++} ++ ++static struct virtio_gpu_simple_resource * ++virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) ++{ ++ struct virtio_gpu_simple_resource *res; ++ ++ QTAILQ_FOREACH(res, &g->reslist, next) { ++ if (res->resource_id == resource_id) { ++ return res; ++ } ++ } ++ return NULL; ++} ++ ++void ++vg_ctrl_response(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd, ++ struct virtio_gpu_ctrl_hdr *resp, ++ size_t resp_len) ++{ ++ size_t s; ++ ++ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { ++ resp->flags |= VIRTIO_GPU_FLAG_FENCE; ++ resp->fence_id = cmd->cmd_hdr.fence_id; ++ resp->ctx_id = cmd->cmd_hdr.ctx_id; ++ } ++ virtio_gpu_ctrl_hdr_bswap(resp); ++ s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); ++ if (s != resp_len) { ++ g_critical("%s: response size incorrect %zu vs %zu", ++ __func__, s, resp_len); ++ } ++ vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); ++ vu_queue_notify(&g->dev.parent, cmd->vq); ++ cmd->finished = true; ++} ++ ++void ++vg_ctrl_response_nodata(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd, ++ enum virtio_gpu_ctrl_type type) ++{ ++ struct virtio_gpu_ctrl_hdr resp = { ++ .type = type, ++ }; ++ ++ vg_ctrl_response(g, cmd, &resp, sizeof(resp)); ++} ++ ++void ++vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_resp_display_info dpy_info = { {} }; ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_GET_DISPLAY_INFO, ++ .size = 0, ++ }; ++ ++ assert(vg->wait_ok == 0); ++ ++ vg_send_msg(vg, &msg, -1); ++ if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { ++ return; ++ } ++ ++ vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); ++} ++ ++static void ++vg_resource_create_2d(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ pixman_format_code_t pformat; ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_create_2d c2d; ++ ++ VUGPU_FILL_CMD(c2d); ++ virtio_gpu_bswap_32(&c2d, sizeof(c2d)); ++ ++ if (c2d.resource_id == 0) { ++ g_critical("%s: resource id 0 is not allowed", __func__); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ res = virtio_gpu_find_resource(g, c2d.resource_id); ++ if (res) { ++ g_critical("%s: resource already exists %d", __func__, c2d.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ res = g_new0(struct virtio_gpu_simple_resource, 1); ++ res->width = c2d.width; ++ res->height = c2d.height; ++ res->format = c2d.format; ++ res->resource_id = c2d.resource_id; ++ ++ pformat = virtio_gpu_get_pixman_format(c2d.format); ++ if (!pformat) { ++ g_critical("%s: host couldn't handle guest format %d", ++ __func__, c2d.format); ++ g_free(res); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); ++ res->image = pixman_image_create_bits(pformat, ++ c2d.width, ++ c2d.height, ++ (uint32_t *)res->buffer.mmap, ++ res->buffer.stride); ++ if (!res->image) { ++ g_critical("%s: resource creation failed %d %d %d", ++ __func__, c2d.resource_id, c2d.width, c2d.height); ++ g_free(res); ++ cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ QTAILQ_INSERT_HEAD(&g->reslist, res, next); ++} ++ ++static void ++vg_disable_scanout(VuGpu *g, int scanout_id) ++{ ++ struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; ++ struct virtio_gpu_simple_resource *res; ++ ++ if (scanout->resource_id == 0) { ++ return; ++ } ++ ++ res = virtio_gpu_find_resource(g, scanout->resource_id); ++ if (res) { ++ res->scanout_bitmask &= ~(1 << scanout_id); ++ } ++ ++ scanout->width = 0; ++ scanout->height = 0; ++ ++ if (g->sock_fd >= 0) { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_SCANOUT, ++ .size = sizeof(VhostUserGpuScanout), ++ .payload.scanout.scanout_id = scanout_id, ++ }; ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_resource_destroy(VuGpu *g, ++ struct virtio_gpu_simple_resource *res) ++{ ++ int i; ++ ++ if (res->scanout_bitmask) { ++ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { ++ if (res->scanout_bitmask & (1 << i)) { ++ vg_disable_scanout(g, i); ++ } ++ } ++ } ++ ++ vugbm_buffer_destroy(&res->buffer); ++ pixman_image_unref(res->image); ++ QTAILQ_REMOVE(&g->reslist, res, next); ++ g_free(res); ++} ++ ++static void ++vg_resource_unref(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_unref unref; ++ ++ VUGPU_FILL_CMD(unref); ++ virtio_gpu_bswap_32(&unref, sizeof(unref)); ++ ++ res = virtio_gpu_find_resource(g, unref.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, unref.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ vg_resource_destroy(g, res); ++} ++ ++int ++vg_create_mapping_iov(VuGpu *g, ++ struct virtio_gpu_resource_attach_backing *ab, ++ struct virtio_gpu_ctrl_command *cmd, ++ struct iovec **iov) ++{ ++ struct virtio_gpu_mem_entry *ents; ++ size_t esize, s; ++ int i; ++ ++ if (ab->nr_entries > 16384) { ++ g_critical("%s: nr_entries is too big (%d > 16384)", ++ __func__, ab->nr_entries); ++ return -1; ++ } ++ ++ esize = sizeof(*ents) * ab->nr_entries; ++ ents = g_malloc(esize); ++ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, ++ sizeof(*ab), ents, esize); ++ if (s != esize) { ++ g_critical("%s: command data size incorrect %zu vs %zu", ++ __func__, s, esize); ++ g_free(ents); ++ return -1; ++ } ++ ++ *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); ++ for (i = 0; i < ab->nr_entries; i++) { ++ uint64_t len = ents[i].length; ++ (*iov)[i].iov_len = ents[i].length; ++ (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); ++ if (!(*iov)[i].iov_base || len != ents[i].length) { ++ g_critical("%s: resource %d element %d", ++ __func__, ab->resource_id, i); ++ g_free(*iov); ++ g_free(ents); ++ *iov = NULL; ++ return -1; ++ } ++ } ++ g_free(ents); ++ return 0; ++} ++ ++static void ++vg_resource_attach_backing(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_attach_backing ab; ++ int ret; ++ ++ VUGPU_FILL_CMD(ab); ++ virtio_gpu_bswap_32(&ab, sizeof(ab)); ++ ++ res = virtio_gpu_find_resource(g, ab.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, ab.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); ++ if (ret != 0) { ++ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; ++ return; ++ } ++ ++ res->iov_cnt = ab.nr_entries; ++} ++ ++static void ++vg_resource_detach_backing(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_detach_backing detach; ++ ++ VUGPU_FILL_CMD(detach); ++ virtio_gpu_bswap_32(&detach, sizeof(detach)); ++ ++ res = virtio_gpu_find_resource(g, detach.resource_id); ++ if (!res || !res->iov) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, detach.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ g_free(res->iov); ++ res->iov = NULL; ++ res->iov_cnt = 0; ++} ++ ++static void ++vg_transfer_to_host_2d(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ int h; ++ uint32_t src_offset, dst_offset, stride; ++ int bpp; ++ pixman_format_code_t format; ++ struct virtio_gpu_transfer_to_host_2d t2d; ++ ++ VUGPU_FILL_CMD(t2d); ++ virtio_gpu_t2d_bswap(&t2d); ++ ++ res = virtio_gpu_find_resource(g, t2d.resource_id); ++ if (!res || !res->iov) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, t2d.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (t2d.r.x > res->width || ++ t2d.r.y > res->height || ++ t2d.r.width > res->width || ++ t2d.r.height > res->height || ++ t2d.r.x + t2d.r.width > res->width || ++ t2d.r.y + t2d.r.height > res->height) { ++ g_critical("%s: transfer bounds outside resource" ++ " bounds for resource %d: %d %d %d %d vs %d %d", ++ __func__, t2d.resource_id, t2d.r.x, t2d.r.y, ++ t2d.r.width, t2d.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ format = pixman_image_get_format(res->image); ++ bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; ++ stride = pixman_image_get_stride(res->image); ++ ++ if (t2d.offset || t2d.r.x || t2d.r.y || ++ t2d.r.width != pixman_image_get_width(res->image)) { ++ void *img_data = pixman_image_get_data(res->image); ++ for (h = 0; h < t2d.r.height; h++) { ++ src_offset = t2d.offset + stride * h; ++ dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); ++ ++ iov_to_buf(res->iov, res->iov_cnt, src_offset, ++ img_data ++ + dst_offset, t2d.r.width * bpp); ++ } ++ } else { ++ iov_to_buf(res->iov, res->iov_cnt, 0, ++ pixman_image_get_data(res->image), ++ pixman_image_get_stride(res->image) ++ * pixman_image_get_height(res->image)); ++ } ++} ++ ++static void ++vg_set_scanout(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res, *ores; ++ struct virtio_gpu_scanout *scanout; ++ struct virtio_gpu_set_scanout ss; ++ int fd; ++ ++ VUGPU_FILL_CMD(ss); ++ virtio_gpu_bswap_32(&ss, sizeof(ss)); ++ ++ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { ++ g_critical("%s: illegal scanout id specified %d", ++ __func__, ss.scanout_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; ++ return; ++ } ++ ++ if (ss.resource_id == 0) { ++ vg_disable_scanout(g, ss.scanout_id); ++ return; ++ } ++ ++ /* create a surface for this scanout */ ++ res = virtio_gpu_find_resource(g, ss.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, ss.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (ss.r.x > res->width || ++ ss.r.y > res->height || ++ ss.r.width > res->width || ++ ss.r.height > res->height || ++ ss.r.x + ss.r.width > res->width || ++ ss.r.y + ss.r.height > res->height) { ++ g_critical("%s: illegal scanout %d bounds for" ++ " resource %d, (%d,%d)+%d,%d vs %d %d", ++ __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, ++ ss.r.width, ss.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ scanout = &g->scanout[ss.scanout_id]; ++ ++ ores = virtio_gpu_find_resource(g, scanout->resource_id); ++ if (ores) { ++ ores->scanout_bitmask &= ~(1 << ss.scanout_id); ++ } ++ ++ res->scanout_bitmask |= (1 << ss.scanout_id); ++ scanout->resource_id = ss.resource_id; ++ scanout->x = ss.r.x; ++ scanout->y = ss.r.y; ++ scanout->width = ss.r.width; ++ scanout->height = ss.r.height; ++ ++ struct vugbm_buffer *buffer = &res->buffer; ++ ++ if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_DMABUF_SCANOUT, ++ .size = sizeof(VhostUserGpuDMABUFScanout), ++ .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { ++ .scanout_id = ss.scanout_id, ++ .x = ss.r.x, ++ .y = ss.r.y, ++ .width = ss.r.width, ++ .height = ss.r.height, ++ .fd_width = buffer->width, ++ .fd_height = buffer->height, ++ .fd_stride = buffer->stride, ++ .fd_drm_fourcc = buffer->format ++ } ++ }; ++ ++ if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { ++ vg_send_msg(g, &msg, fd); ++ close(fd); ++ } ++ } else { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_SCANOUT, ++ .size = sizeof(VhostUserGpuScanout), ++ .payload.scanout = (VhostUserGpuScanout) { ++ .scanout_id = ss.scanout_id, ++ .width = scanout->width, ++ .height = scanout->height ++ } ++ }; ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_resource_flush(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_flush rf; ++ pixman_region16_t flush_region; ++ int i; ++ ++ VUGPU_FILL_CMD(rf); ++ virtio_gpu_bswap_32(&rf, sizeof(rf)); ++ ++ res = virtio_gpu_find_resource(g, rf.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d\n", ++ __func__, rf.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (rf.r.x > res->width || ++ rf.r.y > res->height || ++ rf.r.width > res->width || ++ rf.r.height > res->height || ++ rf.r.x + rf.r.width > res->width || ++ rf.r.y + rf.r.height > res->height) { ++ g_critical("%s: flush bounds outside resource" ++ " bounds for resource %d: %d %d %d %d vs %d %d\n", ++ __func__, rf.resource_id, rf.r.x, rf.r.y, ++ rf.r.width, rf.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ pixman_region_init_rect(&flush_region, ++ rf.r.x, rf.r.y, rf.r.width, rf.r.height); ++ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { ++ struct virtio_gpu_scanout *scanout; ++ pixman_region16_t region, finalregion; ++ pixman_box16_t *extents; ++ ++ if (!(res->scanout_bitmask & (1 << i))) { ++ continue; ++ } ++ scanout = &g->scanout[i]; ++ ++ pixman_region_init(&finalregion); ++ pixman_region_init_rect(®ion, scanout->x, scanout->y, ++ scanout->width, scanout->height); ++ ++ pixman_region_intersect(&finalregion, &flush_region, ®ion); ++ ++ extents = pixman_region_extents(&finalregion); ++ size_t width = extents->x2 - extents->x1; ++ size_t height = extents->y2 - extents->y1; ++ ++ if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { ++ VhostUserGpuMsg vmsg = { ++ .request = VHOST_USER_GPU_DMABUF_UPDATE, ++ .size = sizeof(VhostUserGpuUpdate), ++ .payload.update = (VhostUserGpuUpdate) { ++ .scanout_id = i, ++ .x = extents->x1, ++ .y = extents->y1, ++ .width = width, ++ .height = height, ++ } ++ }; ++ vg_send_msg(g, &vmsg, -1); ++ vg_wait_ok(g); ++ } else { ++ size_t bpp = ++ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; ++ size_t size = width * height * bpp; ++ ++ void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + ++ sizeof(VhostUserGpuUpdate) + size); ++ VhostUserGpuMsg *msg = p; ++ msg->request = VHOST_USER_GPU_UPDATE; ++ msg->size = sizeof(VhostUserGpuUpdate) + size; ++ msg->payload.update = (VhostUserGpuUpdate) { ++ .scanout_id = i, ++ .x = extents->x1, ++ .y = extents->y1, ++ .width = width, ++ .height = height, ++ }; ++ pixman_image_t *i = ++ pixman_image_create_bits(pixman_image_get_format(res->image), ++ msg->payload.update.width, ++ msg->payload.update.height, ++ p + offsetof(VhostUserGpuMsg, ++ payload.update.data), ++ width * bpp); ++ pixman_image_composite(PIXMAN_OP_SRC, ++ res->image, NULL, i, ++ extents->x1, extents->y1, ++ 0, 0, 0, 0, ++ width, height); ++ pixman_image_unref(i); ++ vg_send_msg(g, msg, -1); ++ g_free(msg); ++ } ++ pixman_region_fini(®ion); ++ pixman_region_fini(&finalregion); ++ } ++ pixman_region_fini(&flush_region); ++} ++ ++static void ++vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) ++{ ++ switch (cmd->cmd_hdr.type) { ++ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: ++ vg_get_display_info(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: ++ vg_resource_create_2d(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_UNREF: ++ vg_resource_unref(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_FLUSH: ++ vg_resource_flush(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: ++ vg_transfer_to_host_2d(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_SET_SCANOUT: ++ vg_set_scanout(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: ++ vg_resource_attach_backing(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: ++ vg_resource_detach_backing(vg, cmd); ++ break; ++ /* case VIRTIO_GPU_CMD_GET_EDID: */ ++ /* break */ ++ default: ++ g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); ++ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; ++ break; ++ } ++ if (!cmd->finished) { ++ vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : ++ VIRTIO_GPU_RESP_OK_NODATA); ++ } ++} ++ ++static void ++vg_handle_ctrl(VuDev *dev, int qidx) ++{ ++ VuGpu *vg = container_of(dev, VuGpu, dev.parent); ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ struct virtio_gpu_ctrl_command *cmd = NULL; ++ size_t len; ++ ++ for (;;) { ++ if (vg->wait_ok != 0) { ++ return; ++ } ++ ++ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); ++ if (!cmd) { ++ break; ++ } ++ cmd->vq = vq; ++ cmd->error = 0; ++ cmd->finished = false; ++ ++ len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, ++ 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); ++ if (len != sizeof(cmd->cmd_hdr)) { ++ g_warning("%s: command size incorrect %zu vs %zu\n", ++ __func__, len, sizeof(cmd->cmd_hdr)); ++ } ++ ++ virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); ++ g_debug("%d %s\n", cmd->cmd_hdr.type, ++ vg_cmd_to_string(cmd->cmd_hdr.type)); ++ ++ if (vg->virgl) { ++ vg_virgl_process_cmd(vg, cmd); ++ } else { ++ vg_process_cmd(vg, cmd); ++ } ++ ++ if (!cmd->finished) { ++ QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); ++ vg->inflight++; ++ } else { ++ g_free(cmd); ++ } ++ } ++} ++ ++static void ++update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) ++{ ++ struct virtio_gpu_simple_resource *res; ++ ++ res = virtio_gpu_find_resource(g, resource_id); ++ g_return_if_fail(res != NULL); ++ g_return_if_fail(pixman_image_get_width(res->image) == 64); ++ g_return_if_fail(pixman_image_get_height(res->image) == 64); ++ g_return_if_fail( ++ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); ++ ++ memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); ++} ++ ++static void ++vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) ++{ ++ bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; ++ ++ g_debug("%s move:%d\n", G_STRFUNC, move); ++ ++ if (move) { ++ VhostUserGpuMsg msg = { ++ .request = cursor->resource_id ? ++ VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, ++ .size = sizeof(VhostUserGpuCursorPos), ++ .payload.cursor_pos = { ++ .scanout_id = cursor->pos.scanout_id, ++ .x = cursor->pos.x, ++ .y = cursor->pos.y, ++ } ++ }; ++ vg_send_msg(g, &msg, -1); ++ } else { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_CURSOR_UPDATE, ++ .size = sizeof(VhostUserGpuCursorUpdate), ++ .payload.cursor_update = { ++ .pos = { ++ .scanout_id = cursor->pos.scanout_id, ++ .x = cursor->pos.x, ++ .y = cursor->pos.y, ++ }, ++ .hot_x = cursor->hot_x, ++ .hot_y = cursor->hot_y, ++ } ++ }; ++ if (g->virgl) { ++ vg_virgl_update_cursor_data(g, cursor->resource_id, ++ msg.payload.cursor_update.data); ++ } else { ++ update_cursor_data_simple(g, cursor->resource_id, ++ msg.payload.cursor_update.data); ++ } ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_handle_cursor(VuDev *dev, int qidx) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ VuVirtqElement *elem; ++ size_t len; ++ struct virtio_gpu_update_cursor cursor; ++ ++ for (;;) { ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ if (!elem) { ++ break; ++ } ++ g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); ++ ++ len = iov_to_buf(elem->out_sg, elem->out_num, ++ 0, &cursor, sizeof(cursor)); ++ if (len != sizeof(cursor)) { ++ g_warning("%s: cursor size incorrect %zu vs %zu\n", ++ __func__, len, sizeof(cursor)); ++ } else { ++ virtio_gpu_bswap_32(&cursor, sizeof(cursor)); ++ vg_process_cursor_cmd(g, &cursor); ++ } ++ vu_queue_push(dev, vq, elem, 0); ++ vu_queue_notify(dev, vq); ++ g_free(elem); ++ } ++} ++ ++static void ++vg_panic(VuDev *dev, const char *msg) ++{ ++ g_critical("%s\n", msg); ++ exit(1); ++} ++ ++static void ++vg_queue_set_started(VuDev *dev, int qidx, bool started) ++{ ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ ++ g_debug("queue started %d:%d\n", qidx, started); ++ ++ switch (qidx) { ++ case 0: ++ vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); ++ break; ++ case 1: ++ vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); ++ break; ++ default: ++ break; ++ } ++} ++ ++static void ++set_gpu_protocol_features(VuGpu *g) ++{ ++ uint64_t u64; ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES ++ }; ++ ++ assert(g->wait_ok == 0); ++ vg_send_msg(g, &msg, -1); ++ if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { ++ return; ++ } ++ ++ msg = (VhostUserGpuMsg) { ++ .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, ++ .size = sizeof(uint64_t), ++ .payload.u64 = 0 ++ }; ++ vg_send_msg(g, &msg, -1); ++} ++ ++static int ++vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ ++ switch (msg->request) { ++ case VHOST_USER_GPU_SET_SOCKET: { ++ g_return_val_if_fail(msg->fd_num == 1, 1); ++ g_return_val_if_fail(g->sock_fd == -1, 1); ++ g->sock_fd = msg->fds[0]; ++ set_gpu_protocol_features(g); ++ return 1; ++ } ++ default: ++ return 0; ++ } ++ ++ return 0; ++} ++ ++static uint64_t ++vg_get_features(VuDev *dev) ++{ ++ uint64_t features = 0; ++ ++ if (opt_virgl) { ++ features |= 1 << VIRTIO_GPU_F_VIRGL; ++ } ++ ++ return features; ++} ++ ++static void ++vg_set_features(VuDev *dev, uint64_t features) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); ++ ++ if (virgl && !g->virgl_inited) { ++ if (!vg_virgl_init(g)) { ++ vg_panic(dev, "Failed to initialize virgl"); ++ } ++ g->virgl_inited = true; ++ } ++ ++ g->virgl = virgl; ++} ++ ++static int ++vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ ++ g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); ++ ++ if (opt_virgl) { ++ g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); ++ } ++ ++ memcpy(config, &g->virtio_config, len); ++ ++ return 0; ++} ++ ++static int ++vg_set_config(VuDev *dev, const uint8_t *data, ++ uint32_t offset, uint32_t size, ++ uint32_t flags) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; ++ ++ if (config->events_clear) { ++ g->virtio_config.events_read &= ~config->events_clear; ++ } ++ ++ return 0; ++} ++ ++static const VuDevIface vuiface = { ++ .set_features = vg_set_features, ++ .get_features = vg_get_features, ++ .queue_set_started = vg_queue_set_started, ++ .process_msg = vg_process_msg, ++ .get_config = vg_get_config, ++ .set_config = vg_set_config, ++}; ++ ++static void ++vg_destroy(VuGpu *g) ++{ ++ struct virtio_gpu_simple_resource *res, *tmp; ++ ++ vug_deinit(&g->dev); ++ ++ vg_sock_fd_close(g); ++ ++ QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { ++ vg_resource_destroy(g, res); ++ } ++ ++ vugbm_device_destroy(&g->gdev); ++} ++ ++static GOptionEntry entries[] = { ++ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, ++ "Print capabilities", NULL }, ++ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, ++ "Use inherited fd socket", "FDNUM" }, ++ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, ++ "Use UNIX socket path", "PATH" }, ++ { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, ++ "Specify DRM render node", "PATH" }, ++ { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, ++ "Turn virgl rendering on", NULL }, ++ { NULL, } ++}; ++ ++int ++main(int argc, char *argv[]) ++{ ++ GOptionContext *context; ++ GError *error = NULL; ++ GMainLoop *loop = NULL; ++ int fd; ++ VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; ++ ++ QTAILQ_INIT(&g.reslist); ++ QTAILQ_INIT(&g.fenceq); ++ ++ context = g_option_context_new("QEMU vhost-user-gpu"); ++ g_option_context_add_main_entries(context, entries, NULL); ++ if (!g_option_context_parse(context, &argc, &argv, &error)) { ++ g_printerr("Option parsing failed: %s\n", error->message); ++ exit(EXIT_FAILURE); ++ } ++ g_option_context_free(context); ++ ++ if (opt_print_caps) { ++ g_print("{\n"); ++ g_print(" \"type\": \"gpu\",\n"); ++ g_print(" \"features\": [\n"); ++ g_print(" \"render-node\",\n"); ++ g_print(" \"virgl\"\n"); ++ g_print(" ]\n"); ++ g_print("}\n"); ++ exit(EXIT_SUCCESS); ++ } ++ ++ g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); ++ if (opt_render_node && g.drm_rnode_fd == -1) { ++ g_printerr("Failed to open DRM rendernode.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (g.drm_rnode_fd >= 0) { ++ if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { ++ g_warning("Failed to init DRM device, using fallback path"); ++ } ++ } ++ ++ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { ++ g_printerr("Please specify either --fd or --socket-path\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (opt_socket_path) { ++ int lsock = unix_listen(opt_socket_path, &error_fatal); ++ if (lsock < 0) { ++ g_printerr("Failed to listen on %s.\n", opt_socket_path); ++ exit(EXIT_FAILURE); ++ } ++ fd = accept(lsock, NULL, NULL); ++ close(lsock); ++ } else { ++ fd = opt_fdnum; ++ } ++ if (fd == -1) { ++ g_printerr("Invalid vhost-user socket.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { ++ g_printerr("Failed to initialize libvhost-user-glib.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ loop = g_main_loop_new(NULL, FALSE); ++ g_main_loop_run(loop); ++ g_main_loop_unref(loop); ++ ++ vg_destroy(&g); ++ if (g.drm_rnode_fd >= 0) { ++ close(g.drm_rnode_fd); ++ } ++ ++ return 0; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-compat-disable-edid-for-virtio-gpu-ccw.patch b/SOURCES/kvm-compat-disable-edid-for-virtio-gpu-ccw.patch new file mode 100644 index 0000000..e000534 --- /dev/null +++ b/SOURCES/kvm-compat-disable-edid-for-virtio-gpu-ccw.patch @@ -0,0 +1,50 @@ +From 8f9f4d8d52ebb7878543ac0b84cc372477041e33 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Wed, 1 Apr 2020 16:13:50 -0400 +Subject: [PATCH 2/2] compat: disable 'edid' for virtio-gpu-ccw + +RH-Author: Cornelia Huck +Message-id: <20200401161350.20462-1-cohuck@redhat.com> +Patchwork-id: 94523 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2] compat: disable 'edid' for virtio-gpu-ccw +Bugzilla: 1816793 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Markus Armbruster +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816793 +Branch: rhel-av-8.2.1 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27629804 +Upstream: downstream only +Tested: verified that for a virtio-gpu-ccw device 'edid' is false with + a s390-ccw-virtio-rhel7.6.0 machine and true with a + s390-ccw-virtio-rhel8.2.0 (s390x does not have the 8.0 or 8.1 + machine types) + +hw_compat_rhel_8_0 copied the original upstream version of +disabling 'edid' for virtio-gpu-pci only (not following later +changes). Switch it to virtio-gpu-device, following upstream +02501fc39381 ("compat: disable edid on correct virtio-gpu device"). + +Signed-off-by: Cornelia Huck +Signed-off-by: Jon Maloy +--- + hw/core/machine.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index e0e0eec8bf..5a025d1af2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -72,7 +72,7 @@ GlobalProperty hw_compat_rhel_8_0[] = { + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-vga", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ +- { "virtio-gpu-pci", "edid", "false" }, ++ { "virtio-gpu-device", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-device", "use-started", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ +-- +2.18.2 + diff --git a/SOURCES/kvm-config-enable-VFIO_CCW.patch b/SOURCES/kvm-config-enable-VFIO_CCW.patch new file mode 100644 index 0000000..44af9cf --- /dev/null +++ b/SOURCES/kvm-config-enable-VFIO_CCW.patch @@ -0,0 +1,39 @@ +From f3e80771c921560a58c30020781fa01a54be8eb0 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:43 -0400 +Subject: [PATCH 09/12] config: enable VFIO_CCW + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-10-cohuck@redhat.com> +Patchwork-id: 97699 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 9/9] config: enable VFIO_CCW +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +Enable vfio-ccw in RHEL builds. + +Upstream: n/a + +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + default-configs/s390x-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak +index c3c73fe752..08a15f3e01 100644 +--- a/default-configs/s390x-rh-devices.mak ++++ b/default-configs/s390x-rh-devices.mak +@@ -9,6 +9,7 @@ CONFIG_SCSI=y + CONFIG_TERMINAL3270=y + CONFIG_VFIO=y + CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y + CONFIG_VFIO_PCI=y + CONFIG_VHOST_USER=y + CONFIG_VIRTIO_CCW=y +-- +2.27.0 + diff --git a/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch new file mode 100644 index 0000000..4212f1c --- /dev/null +++ b/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch @@ -0,0 +1,134 @@ +From 548de8acbf0137b6e49a14b63682badfff037d23 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:44 +0100 +Subject: [PATCH 073/116] contrib/libvhost-user: Protect slave fd with mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-70-dgilbert@redhat.com> +Patchwork-id: 93523 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 069/112] contrib/libvhost-user: Protect slave fd with mutex +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +In future patches we'll be performing commands on the slave-fd driven +by commands on queues, since those queues will be driven by individual +threads we need to make sure they don't attempt to use the slave-fd +for multiple commands in parallel. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c25c02b9e6a196be87a818f459c426556b24770d) +Signed-off-by: Miroslav Rezanina +--- + contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index ec27b78..63e4106 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) + return vu_message_write(dev, conn_fd, vmsg); + } + ++/* ++ * Processes a reply on the slave channel. ++ * Entered with slave_mutex held and releases it before exit. ++ * Returns true on success. ++ */ + static bool + vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) + { + VhostUserMsg msg_reply; ++ bool result = false; + + if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { +- return true; ++ result = true; ++ goto out; + } + + if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { +- return false; ++ goto out; + } + + if (msg_reply.request != vmsg->request) { + DPRINT("Received unexpected msg type. Expected %d received %d", + vmsg->request, msg_reply.request); +- return false; ++ goto out; + } + +- return msg_reply.payload.u64 == 0; ++ result = msg_reply.payload.u64 == 0; ++ ++out: ++ pthread_mutex_unlock(&dev->slave_mutex); ++ return result; + } + + /* Kick the log_call_fd if required. */ +@@ -1105,10 +1116,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + return false; + } + ++ pthread_mutex_lock(&dev->slave_mutex); + if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { ++ pthread_mutex_unlock(&dev->slave_mutex); + return false; + } + ++ /* Also unlocks the slave_mutex */ + return vu_process_message_reply(dev, &vmsg); + } + +@@ -1628,6 +1642,7 @@ vu_deinit(VuDev *dev) + close(dev->slave_fd); + dev->slave_fd = -1; + } ++ pthread_mutex_destroy(&dev->slave_mutex); + + if (dev->sock != -1) { + close(dev->sock); +@@ -1663,6 +1678,7 @@ vu_init(VuDev *dev, + dev->remove_watch = remove_watch; + dev->iface = iface; + dev->log_call_fd = -1; ++ pthread_mutex_init(&dev->slave_mutex, NULL); + dev->slave_fd = -1; + dev->max_queues = max_queues; + +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 46b6007..1844b6f 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include "standard-headers/linux/virtio_ring.h" + + /* Based on qemu/hw/virtio/vhost-user.c */ +@@ -355,6 +356,8 @@ struct VuDev { + VuVirtq *vq; + VuDevInflightInfo inflight_info; + int log_call_fd; ++ /* Must be held while using slave_fd */ ++ pthread_mutex_t slave_mutex; + int slave_fd; + uint64_t log_size; + uint8_t *log_table; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch b/SOURCES/kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch new file mode 100644 index 0000000..891b866 --- /dev/null +++ b/SOURCES/kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch @@ -0,0 +1,98 @@ +From 043decff5812c1f46ed44dd0f82099e3b8bb6a28 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Sun, 31 May 2020 16:40:35 +0100 +Subject: [PATCH 7/7] crypto.c: cleanup created file when + block_crypto_co_create_opts_luks fails + +RH-Author: Maxim Levitsky +Message-id: <20200531164035.34188-4-mlevitsk@redhat.com> +Patchwork-id: 97060 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 3/3] crypto.c: cleanup created file when block_crypto_co_create_opts_luks fails +Bugzilla: 1827630 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: John Snow +RH-Acked-by: Eric Blake + +From: Daniel Henrique Barboza + +When using a non-UTF8 secret to create a volume using qemu-img, the +following error happens: + +$ qemu-img create -f luks --object secret,id=vol_1_encrypt0,file=vol_resize_pool.vol_1.secret.qzVQrI -o key-secret=vol_1_encrypt0 /var/tmp/pool_target/vol_1 10240K + +Formatting '/var/tmp/pool_target/vol_1', fmt=luks size=10485760 key-secret=vol_1_encrypt0 +qemu-img: /var/tmp/pool_target/vol_1: Data from secret vol_1_encrypt0 is not valid UTF-8 + +However, the created file '/var/tmp/pool_target/vol_1' is left behind in the +file system after the failure. This behavior can be observed when creating +the volume using Libvirt, via 'virsh vol-create', and then getting "volume +target path already exist" errors when trying to re-create the volume. + +The volume file is created inside block_crypto_co_create_opts_luks(), in +block/crypto.c. If the bdrv_create_file() call is successful but any +succeeding step fails*, the existing 'fail' label does not take into +account the created file, leaving it behind. + +This patch changes block_crypto_co_create_opts_luks() to delete +'filename' in case of failure. A failure in this point means that +the volume is now truncated/corrupted, so even if 'filename' was an +existing volume before calling qemu-img, it is now unusable. Deleting +the file it is not much worse than leaving it in the filesystem in +this scenario, and we don't have to deal with checking the file +pre-existence in the code. + +* in our case, block_crypto_co_create_generic calls qcrypto_block_create, +which calls qcrypto_block_luks_create, and this function fails when +calling qcrypto_secret_lookup_as_utf8. + +Reported-by: Srikanth Aithal +Suggested-by: Kevin Wolf +Signed-off-by: Daniel Henrique Barboza +Message-Id: <20200130213907.2830642-4-danielhb413@gmail.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1bba30da24e1124ceeb0693c81382a0d77e20ca5) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/crypto.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/block/crypto.c b/block/crypto.c +index 970d463..5e3b15c 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -30,6 +30,7 @@ + #include "qapi/error.h" + #include "qemu/module.h" + #include "qemu/option.h" ++#include "qemu/cutils.h" + #include "crypto.h" + + typedef struct BlockCrypto BlockCrypto; +@@ -597,6 +598,23 @@ static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, + + ret = 0; + fail: ++ /* ++ * If an error occurred, delete 'filename'. Even if the file existed ++ * beforehand, it has been truncated and corrupted in the process. ++ */ ++ if (ret && bs) { ++ Error *local_delete_err = NULL; ++ int r_del = bdrv_co_delete_file(bs, &local_delete_err); ++ /* ++ * ENOTSUP will happen if the block driver doesn't support ++ * the 'bdrv_co_delete_file' interface. This is a predictable ++ * scenario and shouldn't be reported back to the user. ++ */ ++ if ((r_del < 0) && (r_del != -ENOTSUP)) { ++ error_report_err(local_delete_err); ++ } ++ } ++ + bdrv_unref(bs); + qapi_free_QCryptoBlockCreateOptions(create_opts); + qobject_unref(cryptoopts); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch b/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch new file mode 100644 index 0000000..a6177c6 --- /dev/null +++ b/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch @@ -0,0 +1,56 @@ +From f01178897c8f5ff98692a22059dd65e35677eaa3 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Mon, 10 Feb 2020 17:33:58 +0000 +Subject: [PATCH 18/18] docs/arm-cpu-features: Make kvm-no-adjvtime comment + clearer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200210173358.16896-3-drjones@redhat.com> +Patchwork-id: 93772 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer +Bugzilla: 1801320 +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 + +Author: Philippe Mathieu-Daudé +Date: Fri, 07 Feb 2020 14:04:28 +0000 + + docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer + + The bold text sounds like 'knock knock'. Only bolding the + second 'not' makes it easier to read. + + Fixes: dea101a1ae + Signed-off-by: Philippe Mathieu-Daudé + Reviewed-by: Andrew Jones + Message-id: 20200206225148.23923-1-philmd@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit fa3236a970b6ea5be3fa3ad258f1a75920ca1ebb) +Signed-off-by: Danilo C. L. de Paula +--- + docs/arm-cpu-features.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst +index 45d1eb6..48d5054 100644 +--- a/docs/arm-cpu-features.rst ++++ b/docs/arm-cpu-features.rst +@@ -185,7 +185,7 @@ the list of KVM VCPU features and their descriptions. + + kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This + means that by default the virtual time +- adjustment is enabled (vtime is *not not* ++ adjustment is enabled (vtime is not *not* + adjusted). + + When virtual time adjustment is enabled each +-- +1.8.3.1 + diff --git a/SOURCES/kvm-enable-ramfb.patch b/SOURCES/kvm-enable-ramfb.patch new file mode 100644 index 0000000..fa2fe11 --- /dev/null +++ b/SOURCES/kvm-enable-ramfb.patch @@ -0,0 +1,72 @@ +From 441128e2f13a56d4949b70971edd2f6902772959 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Wed, 3 Jun 2020 15:15:56 +0100 +Subject: [PATCH 01/17] enable ramfb + +RH-Author: Gerd Hoffmann +Message-id: <20200603151556.1195-2-kraxel@redhat.com> +Patchwork-id: 97097 +O-Subject: [RHEL-AV-8.2.0.z qemu-kvm PATCH 1/1] enable ramfb +Bugzilla: 1841068 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi + +--- + hw/vfio/pci.c | 5 ----- + hw/display/Makefile.objs | 5 ++--- + 2 files changed, 2 insertions(+), 8 deletions(-) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/display/Makefile.objs | 5 ++--- + hw/vfio/pci.c | 5 ----- + 2 files changed, 2 insertions(+), 8 deletions(-) + +diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs +index 3d0cda1..f2182e3 100644 +--- a/hw/display/Makefile.objs ++++ b/hw/display/Makefile.objs +@@ -1,9 +1,8 @@ + common-obj-$(CONFIG_DDC) += i2c-ddc.o + common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o + +-# Disabled for Red Hat Enterprise Linux +-#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o +-#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o ++common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o ++common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o + + common-obj-$(CONFIG_ADS7846) += ads7846.o + common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d717520..f191904 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3249,7 +3249,6 @@ static const TypeInfo vfio_pci_dev_info = { + }, + }; + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static Property vfio_pci_dev_nohotplug_properties[] = { + DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_END_OF_LIST(), +@@ -3269,15 +3268,11 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { + .instance_size = sizeof(VFIOPCIDevice), + .class_init = vfio_pci_nohotplug_dev_class_init, + }; +-#endif + + static void register_vfio_pci_dev_type(void) + { + type_register_static(&vfio_pci_dev_info); +- +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + type_register_static(&vfio_pci_nohotplug_dev_info); +-#endif + } + + type_init(register_vfio_pci_dev_type) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-error-Document-Error-API-usage-rules.patch b/SOURCES/kvm-error-Document-Error-API-usage-rules.patch new file mode 100644 index 0000000..026b81a --- /dev/null +++ b/SOURCES/kvm-error-Document-Error-API-usage-rules.patch @@ -0,0 +1,156 @@ +From d931195ef5cccd6a4e6fceeba37809b1712c97ad Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:55 -0500 +Subject: [PATCH 04/10] error: Document Error API usage rules +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-5-marcandre.lureau@redhat.com> +Patchwork-id: 100523 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 04/10] error: Document Error API usage rules +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Markus Armbruster + +This merely codifies existing practice, with one exception: the rule +advising against returning void, where existing practice is mixed. + +When the Error API was created, we adopted the (unwritten) rule to +return void when the function returns no useful value on success, +unlike GError, which recommends to return true on success and false on +error then. + +When a function returns a distinct error value, say false, a checked +call that passes the error up looks like + + if (!frobnicate(..., errp)) { + handle the error... + } + +When it returns void, we need + + Error *err = NULL; + + frobnicate(..., &err); + if (err) { + handle the error... + error_propagate(errp, err); + } + +Not only is this more verbose, it also creates an Error object even +when @errp is null, &error_abort or &error_fatal. + +People got tired of the additional boilerplate, and started to ignore +the unwritten rule. The result is confusion among developers about +the preferred usage. + +Make the rule advising against returning void official by putting it +in writing. This will hopefully reduce confusion. + +Update the examples accordingly. + +The remainder of this series will update a substantial amount of code +to honor the rule. + +Signed-off-by: Markus Armbruster +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Greg Kurz +Message-Id: <20200707160613.848843-4-armbru@redhat.com> + +(cherry picked from commit e3fe3988d7851cac30abffae06d2f555ff7bee62) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 52 +++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 46 insertions(+), 6 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 3351fe76368..08d48e74836 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -15,6 +15,33 @@ + /* + * Error reporting system loosely patterned after Glib's GError. + * ++ * = Rules = ++ * ++ * - Functions that use Error to report errors have an Error **errp ++ * parameter. It should be the last parameter, except for functions ++ * taking variable arguments. ++ * ++ * - You may pass NULL to not receive the error, &error_abort to abort ++ * on error, &error_fatal to exit(1) on error, or a pointer to a ++ * variable containing NULL to receive the error. ++ * ++ * - Separation of concerns: the function is responsible for detecting ++ * errors and failing cleanly; handling the error is its caller's ++ * job. Since the value of @errp is about handling the error, the ++ * function should not examine it. ++ * ++ * - On success, the function should not touch *errp. On failure, it ++ * should set a new error, e.g. with error_setg(errp, ...), or ++ * propagate an existing one, e.g. with error_propagate(errp, ...). ++ * ++ * - Whenever practical, also return a value that indicates success / ++ * failure. This can make the error checking more concise, and can ++ * avoid useless error object creation and destruction. Note that ++ * we still have many functions returning void. We recommend ++ * • bool-valued functions return true on success / false on failure, ++ * • pointer-valued functions return non-null / null pointer, and ++ * • integer-valued functions return non-negative / negative. ++ * + * = Creating errors = + * + * Create an error: +@@ -95,14 +122,13 @@ + * Create a new error and pass it to the caller: + * error_setg(errp, "situation normal, all fouled up"); + * +- * Call a function and receive an error from it: +- * Error *err = NULL; +- * foo(arg, &err); +- * if (err) { ++ * Call a function, receive an error from it, and pass it to the caller ++ * - when the function returns a value that indicates failure, say ++ * false: ++ * if (!foo(arg, errp)) { + * handle the error... + * } +- * +- * Receive an error and pass it on to the caller: ++ * - when it does not, say because it is a void function: + * Error *err = NULL; + * foo(arg, &err); + * if (err) { +@@ -120,6 +146,20 @@ + * foo(arg, errp); + * for readability. + * ++ * Receive an error, and handle it locally ++ * - when the function returns a value that indicates failure, say ++ * false: ++ * Error *err = NULL; ++ * if (!foo(arg, &err)) { ++ * handle the error... ++ * } ++ * - when it does not, say because it is a void function: ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * if (err) { ++ * handle the error... ++ * } ++ * + * Receive and accumulate multiple errors (first one wins): + * Error *err = NULL, *local_err = NULL; + * foo(arg, &err); +-- +2.27.0 + diff --git a/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch b/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch new file mode 100644 index 0000000..62720d5 --- /dev/null +++ b/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch @@ -0,0 +1,87 @@ +From 393a5e9b24947f90cd116c4fb1b2ff4ee200f0df Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:53 -0500 +Subject: [PATCH 02/10] error: Fix examples in error.h's big comment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-3-marcandre.lureau@redhat.com> +Patchwork-id: 100521 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 02/10] error: Fix examples in error.h's big comment +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Markus Armbruster + +Mark a bad example more clearly. Fix the error_propagate_prepend() +example. Add a missing declaration and a second error pileup example. + +Signed-off-by: Markus Armbruster +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Greg Kurz +Message-Id: <20200707160613.848843-2-armbru@redhat.com> + +(cherry picked from commit 47ff5ac81e8bb3096500de7b132051691d533d36) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 3f95141a01a..83c38f9a188 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -24,7 +24,7 @@ + * "charm, top, bottom.\n"); + * + * Do *not* contract this to +- * error_setg(&err, "invalid quark\n" ++ * error_setg(&err, "invalid quark\n" // WRONG! + * "Valid quarks are up, down, strange, charm, top, bottom."); + * + * Report an error to the current monitor if we have one, else stderr: +@@ -52,7 +52,8 @@ + * where Error **errp is a parameter, by convention the last one. + * + * Pass an existing error to the caller with the message modified: +- * error_propagate_prepend(errp, err); ++ * error_propagate_prepend(errp, err, ++ * "Could not frobnicate '%s': ", name); + * + * Avoid + * error_propagate(errp, err); +@@ -108,12 +109,23 @@ + * } + * + * Do *not* "optimize" this to ++ * Error *err = NULL; + * foo(arg, &err); + * bar(arg, &err); // WRONG! + * if (err) { + * handle the error... + * } + * because this may pass a non-null err to bar(). ++ * ++ * Likewise, do *not* ++ * Error *err = NULL; ++ * if (cond1) { ++ * error_setg(&err, ...); ++ * } ++ * if (cond2) { ++ * error_setg(&err, ...); // WRONG! ++ * } ++ * because this may pass a non-null err to error_setg(). + */ + + #ifndef ERROR_H +-- +2.27.0 + diff --git a/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch b/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch new file mode 100644 index 0000000..aa5797d --- /dev/null +++ b/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch @@ -0,0 +1,148 @@ +From b41fc6d57fae80ac2a431bca22862985f003fe88 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:54 -0500 +Subject: [PATCH 03/10] error: Improve error.h's big comment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-4-marcandre.lureau@redhat.com> +Patchwork-id: 100522 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 03/10] error: Improve error.h's big comment +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Markus Armbruster + +Add headlines to the big comment. + +Explain examples for NULL, &error_abort and &error_fatal argument +better. + +Tweak rationale for error_propagate_prepend(). + +Signed-off-by: Markus Armbruster +Message-Id: <20200707160613.848843-3-armbru@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Greg Kurz + +(cherry picked from commit 9aac7d486cc792191c25c30851f501624b0c2751) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 51 +++++++++++++++++++++++++++++++------------- + 1 file changed, 36 insertions(+), 15 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 83c38f9a188..3351fe76368 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -15,6 +15,8 @@ + /* + * Error reporting system loosely patterned after Glib's GError. + * ++ * = Creating errors = ++ * + * Create an error: + * error_setg(&err, "situation normal, all fouled up"); + * +@@ -27,6 +29,8 @@ + * error_setg(&err, "invalid quark\n" // WRONG! + * "Valid quarks are up, down, strange, charm, top, bottom."); + * ++ * = Reporting and destroying errors = ++ * + * Report an error to the current monitor if we have one, else stderr: + * error_report_err(err); + * This frees the error object. +@@ -40,6 +44,30 @@ + * error_free(err); + * Note that this loses hints added with error_append_hint(). + * ++ * Call a function ignoring errors: ++ * foo(arg, NULL); ++ * This is more concise than ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * error_free(err); // don't do this ++ * ++ * Call a function aborting on errors: ++ * foo(arg, &error_abort); ++ * This is more concise and fails more nicely than ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * assert(!err); // don't do this ++ * ++ * Call a function treating errors as fatal: ++ * foo(arg, &error_fatal); ++ * This is more concise than ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * if (err) { // don't do this ++ * error_report_err(err); ++ * exit(1); ++ * } ++ * + * Handle an error without reporting it (just for completeness): + * error_free(err); + * +@@ -47,6 +75,11 @@ + * reporting it (primarily useful in testsuites): + * error_free_or_abort(&err); + * ++ * = Passing errors around = ++ * ++ * Errors get passed to the caller through the conventional @errp ++ * parameter. ++ * + * Pass an existing error to the caller: + * error_propagate(errp, err); + * where Error **errp is a parameter, by convention the last one. +@@ -54,11 +87,10 @@ + * Pass an existing error to the caller with the message modified: + * error_propagate_prepend(errp, err, + * "Could not frobnicate '%s': ", name); +- * +- * Avoid +- * error_propagate(errp, err); ++ * This is more concise than ++ * error_propagate(errp, err); // don't do this + * error_prepend(errp, "Could not frobnicate '%s': ", name); +- * because this fails to prepend when @errp is &error_fatal. ++ * and works even when @errp is &error_fatal. + * + * Create a new error and pass it to the caller: + * error_setg(errp, "situation normal, all fouled up"); +@@ -70,15 +102,6 @@ + * handle the error... + * } + * +- * Call a function ignoring errors: +- * foo(arg, NULL); +- * +- * Call a function aborting on errors: +- * foo(arg, &error_abort); +- * +- * Call a function treating errors as fatal: +- * foo(arg, &error_fatal); +- * + * Receive an error and pass it on to the caller: + * Error *err = NULL; + * foo(arg, &err); +@@ -86,8 +109,6 @@ + * handle the error... + * error_propagate(errp, err); + * } +- * where Error **errp is a parameter, by convention the last one. +- * + * Do *not* "optimize" this to + * foo(arg, errp); + * if (*errp) { // WRONG! +-- +2.27.0 + diff --git a/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch b/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch new file mode 100644 index 0000000..bfcece6 --- /dev/null +++ b/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch @@ -0,0 +1,307 @@ +From f6ac3d6bab961c31060d722af23beeb50ce5bdde Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:56 -0500 +Subject: [PATCH 05/10] error: New macro ERRP_GUARD() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-6-marcandre.lureau@redhat.com> +Patchwork-id: 100524 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 05/10] error: New macro ERRP_GUARD() +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Vladimir Sementsov-Ogievskiy + +Introduce a new ERRP_GUARD() macro, to be used at start of functions +with an errp OUT parameter. + +It has three goals: + +1. Fix issue with error_fatal and error_prepend/error_append_hint: the +user can't see this additional information, because exit() happens in +error_setg earlier than information is added. [Reported by Greg Kurz] + +2. Fix issue with error_abort and error_propagate: when we wrap +error_abort by local_err+error_propagate, the resulting coredump will +refer to error_propagate and not to the place where error happened. +(the macro itself doesn't fix the issue, but it allows us to [3.] drop +the local_err+error_propagate pattern, which will definitely fix the +issue) [Reported by Kevin Wolf] + +3. Drop local_err+error_propagate pattern, which is used to workaround +void functions with errp parameter, when caller wants to know resulting +status. (Note: actually these functions could be merely updated to +return int error code). + +To achieve these goals, later patches will add invocations +of this macro at the start of functions with either use +error_prepend/error_append_hint (solving 1) or which use +local_err+error_propagate to check errors, switching those +functions to use *errp instead (solving 2 and 3). + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Paul Durrant +Reviewed-by: Greg Kurz +Reviewed-by: Eric Blake +[Merge comments properly with recent commit "error: Document Error API +usage rules", and edit for clarity. Put ERRP_AUTO_PROPAGATE() before +its helpers, and touch up style. Tweak commit message.] +Signed-off-by: Markus Armbruster +Message-Id: <20200707165037.1026246-2-armbru@redhat.com> + +(cherry picked from commit ae7c80a7bd73685437bf6ba9d7c26098351f4166) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 158 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 139 insertions(+), 19 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 08d48e74836..e658790acfc 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -30,6 +30,10 @@ + * job. Since the value of @errp is about handling the error, the + * function should not examine it. + * ++ * - The function may pass @errp to functions it calls to pass on ++ * their errors to its caller. If it dereferences @errp to check ++ * for errors, it must use ERRP_GUARD(). ++ * + * - On success, the function should not touch *errp. On failure, it + * should set a new error, e.g. with error_setg(errp, ...), or + * propagate an existing one, e.g. with error_propagate(errp, ...). +@@ -45,15 +49,17 @@ + * = Creating errors = + * + * Create an error: +- * error_setg(&err, "situation normal, all fouled up"); ++ * error_setg(errp, "situation normal, all fouled up"); ++ * where @errp points to the location to receive the error. + * + * Create an error and add additional explanation: +- * error_setg(&err, "invalid quark"); +- * error_append_hint(&err, "Valid quarks are up, down, strange, " ++ * error_setg(errp, "invalid quark"); ++ * error_append_hint(errp, "Valid quarks are up, down, strange, " + * "charm, top, bottom.\n"); ++ * This may require use of ERRP_GUARD(); more on that below. + * + * Do *not* contract this to +- * error_setg(&err, "invalid quark\n" // WRONG! ++ * error_setg(errp, "invalid quark\n" // WRONG! + * "Valid quarks are up, down, strange, charm, top, bottom."); + * + * = Reporting and destroying errors = +@@ -107,18 +113,6 @@ + * Errors get passed to the caller through the conventional @errp + * parameter. + * +- * Pass an existing error to the caller: +- * error_propagate(errp, err); +- * where Error **errp is a parameter, by convention the last one. +- * +- * Pass an existing error to the caller with the message modified: +- * error_propagate_prepend(errp, err, +- * "Could not frobnicate '%s': ", name); +- * This is more concise than +- * error_propagate(errp, err); // don't do this +- * error_prepend(errp, "Could not frobnicate '%s': ", name); +- * and works even when @errp is &error_fatal. +- * + * Create a new error and pass it to the caller: + * error_setg(errp, "situation normal, all fouled up"); + * +@@ -129,18 +123,26 @@ + * handle the error... + * } + * - when it does not, say because it is a void function: ++ * ERRP_GUARD(); ++ * foo(arg, errp); ++ * if (*errp) { ++ * handle the error... ++ * } ++ * More on ERRP_GUARD() below. ++ * ++ * Code predating ERRP_GUARD() still exists, and looks like this: + * Error *err = NULL; + * foo(arg, &err); + * if (err) { + * handle the error... +- * error_propagate(errp, err); ++ * error_propagate(errp, err); // deprecated + * } +- * Do *not* "optimize" this to ++ * Avoid in new code. Do *not* "optimize" it to + * foo(arg, errp); + * if (*errp) { // WRONG! + * handle the error... + * } +- * because errp may be NULL! ++ * because errp may be NULL without the ERRP_GUARD() guard. + * + * But when all you do with the error is pass it on, please use + * foo(arg, errp); +@@ -160,6 +162,19 @@ + * handle the error... + * } + * ++ * Pass an existing error to the caller: ++ * error_propagate(errp, err); ++ * This is rarely needed. When @err is a local variable, use of ++ * ERRP_GUARD() commonly results in more readable code. ++ * ++ * Pass an existing error to the caller with the message modified: ++ * error_propagate_prepend(errp, err, ++ * "Could not frobnicate '%s': ", name); ++ * This is more concise than ++ * error_propagate(errp, err); // don't do this ++ * error_prepend(errp, "Could not frobnicate '%s': ", name); ++ * and works even when @errp is &error_fatal. ++ * + * Receive and accumulate multiple errors (first one wins): + * Error *err = NULL, *local_err = NULL; + * foo(arg, &err); +@@ -187,6 +202,69 @@ + * error_setg(&err, ...); // WRONG! + * } + * because this may pass a non-null err to error_setg(). ++ * ++ * = Why, when and how to use ERRP_GUARD() = ++ * ++ * Without ERRP_GUARD(), use of the @errp parameter is restricted: ++ * - It must not be dereferenced, because it may be null. ++ * - It should not be passed to error_prepend() or ++ * error_append_hint(), because that doesn't work with &error_fatal. ++ * ERRP_GUARD() lifts these restrictions. ++ * ++ * To use ERRP_GUARD(), add it right at the beginning of the function. ++ * @errp can then be used without worrying about the argument being ++ * NULL or &error_fatal. ++ * ++ * Using it when it's not needed is safe, but please avoid cluttering ++ * the source with useless code. ++ * ++ * = Converting to ERRP_GUARD() = ++ * ++ * To convert a function to use ERRP_GUARD(): ++ * ++ * 0. If the Error ** parameter is not named @errp, rename it to ++ * @errp. ++ * ++ * 1. Add an ERRP_GUARD() invocation, by convention right at the ++ * beginning of the function. This makes @errp safe to use. ++ * ++ * 2. Replace &err by errp, and err by *errp. Delete local variable ++ * @err. ++ * ++ * 3. Delete error_propagate(errp, *errp), replace ++ * error_propagate_prepend(errp, *errp, ...) by error_prepend(errp, ...) ++ * ++ * 4. Ensure @errp is valid at return: when you destroy *errp, set ++ * errp = NULL. ++ * ++ * Example: ++ * ++ * bool fn(..., Error **errp) ++ * { ++ * Error *err = NULL; ++ * ++ * foo(arg, &err); ++ * if (err) { ++ * handle the error... ++ * error_propagate(errp, err); ++ * return false; ++ * } ++ * ... ++ * } ++ * ++ * becomes ++ * ++ * bool fn(..., Error **errp) ++ * { ++ * ERRP_GUARD(); ++ * ++ * foo(arg, errp); ++ * if (*errp) { ++ * handle the error... ++ * return false; ++ * } ++ * ... ++ * } + */ + + #ifndef ERROR_H +@@ -287,6 +365,7 @@ void error_setg_win32_internal(Error **errp, + * the error object. + * Else, move the error object from @local_err to *@dst_errp. + * On return, @local_err is invalid. ++ * Please use ERRP_GUARD() instead when possible. + * Please don't error_propagate(&error_fatal, ...), use + * error_report_err() and exit(), because that's more obvious. + */ +@@ -298,6 +377,7 @@ void error_propagate(Error **dst_errp, Error *local_err); + * Behaves like + * error_prepend(&local_err, fmt, ...); + * error_propagate(dst_errp, local_err); ++ * Please use ERRP_GUARD() and error_prepend() instead when possible. + */ + void error_propagate_prepend(Error **dst_errp, Error *local_err, + const char *fmt, ...); +@@ -395,6 +475,46 @@ void error_set_internal(Error **errp, + ErrorClass err_class, const char *fmt, ...) + GCC_FMT_ATTR(6, 7); + ++/* ++ * Make @errp parameter easier to use regardless of argument value ++ * ++ * This macro is for use right at the beginning of a function that ++ * takes an Error **errp parameter to pass errors to its caller. The ++ * parameter must be named @errp. ++ * ++ * It must be used when the function dereferences @errp or passes ++ * @errp to error_prepend(), error_vprepend(), or error_append_hint(). ++ * It is safe to use even when it's not needed, but please avoid ++ * cluttering the source with useless code. ++ * ++ * If @errp is NULL or &error_fatal, rewrite it to point to a local ++ * Error variable, which will be automatically propagated to the ++ * original @errp on function exit. ++ * ++ * Note: &error_abort is not rewritten, because that would move the ++ * abort from the place where the error is created to the place where ++ * it's propagated. ++ */ ++#define ERRP_GUARD() \ ++ g_auto(ErrorPropagator) _auto_errp_prop = {.errp = errp}; \ ++ do { \ ++ if (!errp || errp == &error_fatal) { \ ++ errp = &_auto_errp_prop.local_err; \ ++ } \ ++ } while (0) ++ ++typedef struct ErrorPropagator { ++ Error *local_err; ++ Error **errp; ++} ErrorPropagator; ++ ++static inline void error_propagator_cleanup(ErrorPropagator *prop) ++{ ++ error_propagate(prop->errp, prop->local_err); ++} ++ ++G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(ErrorPropagator, error_propagator_cleanup); ++ + /* + * Special error destination to abort on error. + * See error_setg() and error_propagate() for details. +-- +2.27.0 + diff --git a/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch b/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch new file mode 100644 index 0000000..5d44708 --- /dev/null +++ b/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch @@ -0,0 +1,85 @@ +From 5770fe43fe1e15e6f53cfd3705605e8645b95a98 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 13 Mar 2020 17:17:08 +0000 +Subject: [PATCH 20/20] exec/rom_reset: Free rom data during inmigrate skip +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200313171708.242774-1-dgilbert@redhat.com> +Patchwork-id: 94292 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] exec/rom_reset: Free rom data during inmigrate skip +Bugzilla: 1809380 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Paolo Bonzini + +From: "Dr. David Alan Gilbert" + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1809380 +brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27249921 +branch: rhel-av-8.2.0 +upstream: Posted and with review-by, not merged yet + +Commit 355477f8c73e9 skips rom reset when we're an incoming migration +so as not to overwrite shared ram in the ignore-shared migration +optimisation. +However, it's got an unexpected side effect that because it skips +freeing the ROM data, when rom_reset gets called later on, after +migration (e.g. during a reboot), the ROM does get reset to the original +file contents. Because of seabios/x86's weird reboot process +this confuses a reboot into hanging after a migration. + +Fixes: 355477f8c73e9 ("migration: do not rom_reset() during incoming migration") +https://bugzilla.redhat.com/show_bug.cgi?id=1809380 + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/loader.c | 25 ++++++++++++++++--------- + 1 file changed, 16 insertions(+), 9 deletions(-) + +diff --git a/hw/core/loader.c b/hw/core/loader.c +index 5099f27..375b29b 100644 +--- a/hw/core/loader.c ++++ b/hw/core/loader.c +@@ -1118,19 +1118,26 @@ static void rom_reset(void *unused) + { + Rom *rom; + +- /* +- * We don't need to fill in the RAM with ROM data because we'll fill +- * the data in during the next incoming migration in all cases. Note +- * that some of those RAMs can actually be modified by the guest on ARM +- * so this is probably the only right thing to do here. +- */ +- if (runstate_check(RUN_STATE_INMIGRATE)) +- return; +- + QTAILQ_FOREACH(rom, &roms, next) { + if (rom->fw_file) { + continue; + } ++ /* ++ * We don't need to fill in the RAM with ROM data because we'll fill ++ * the data in during the next incoming migration in all cases. Note ++ * that some of those RAMs can actually be modified by the guest. ++ */ ++ if (runstate_check(RUN_STATE_INMIGRATE)) { ++ if (rom->data && rom->isrom) { ++ /* ++ * Free it so that a rom_reset after migration doesn't ++ * overwrite a potentially modified 'rom'. ++ */ ++ rom_free_data(rom); ++ } ++ continue; ++ } ++ + if (rom->data == NULL) { + continue; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch b/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch new file mode 100644 index 0000000..ea2edbd --- /dev/null +++ b/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch @@ -0,0 +1,131 @@ +From 3d3509c010129bd15eb1f5ec1a7b9eedcdbf23f6 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:44 +0000 +Subject: [PATCH 03/20] file-posix: Drop hdev_co_create_opts() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-4-mlevitsk@redhat.com> +Patchwork-id: 94225 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] file-posix: Drop hdev_co_create_opts() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +The generic fallback implementation effectively does the same. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-4-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit 87ca3b8fa615b278b33cabf9ed22b3f44b5214ba) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 67 ------------------------------------------------------ + 1 file changed, 67 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 1b805bd..fd29372 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3418,67 +3418,6 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, + return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); + } + +-static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, +- Error **errp) +-{ +- int fd; +- int ret = 0; +- struct stat stat_buf; +- int64_t total_size = 0; +- bool has_prefix; +- +- /* This function is used by both protocol block drivers and therefore either +- * of these prefixes may be given. +- * The return value has to be stored somewhere, otherwise this is an error +- * due to -Werror=unused-value. */ +- has_prefix = +- strstart(filename, "host_device:", &filename) || +- strstart(filename, "host_cdrom:" , &filename); +- +- (void)has_prefix; +- +- ret = raw_normalize_devicepath(&filename, errp); +- if (ret < 0) { +- return ret; +- } +- +- /* Read out options */ +- total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), +- BDRV_SECTOR_SIZE); +- +- fd = qemu_open(filename, O_WRONLY | O_BINARY); +- if (fd < 0) { +- ret = -errno; +- error_setg_errno(errp, -ret, "Could not open device"); +- return ret; +- } +- +- if (fstat(fd, &stat_buf) < 0) { +- ret = -errno; +- error_setg_errno(errp, -ret, "Could not stat device"); +- } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) { +- error_setg(errp, +- "The given file is neither a block nor a character device"); +- ret = -ENODEV; +- } else if (lseek(fd, 0, SEEK_END) < total_size) { +- error_setg(errp, "Device is too small"); +- ret = -ENOSPC; +- } +- +- if (!ret && total_size) { +- uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; +- int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); +- if (lseek(fd, 0, SEEK_SET) == -1) { +- ret = -errno; +- } else { +- ret = qemu_write_full(fd, buf, zero_size); +- ret = ret == zero_size ? 0 : -errno; +- } +- } +- qemu_close(fd); +- return ret; +-} +- + static BlockDriver bdrv_host_device = { + .format_name = "host_device", + .protocol_name = "host_device", +@@ -3491,8 +3430,6 @@ static BlockDriver bdrv_host_device = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, +@@ -3619,8 +3556,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + +@@ -3753,8 +3688,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + + .bdrv_co_preadv = raw_co_preadv, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch b/SOURCES/kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch new file mode 100644 index 0000000..efdf16b --- /dev/null +++ b/SOURCES/kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch @@ -0,0 +1,48 @@ +From 55bfda3a0e077b822f57e8ed901f0cee848bc471 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:35 +0100 +Subject: [PATCH 07/17] file-posix: Support BDRV_REQ_ZERO_WRITE for truncate + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-7-kwolf@redhat.com> +Patchwork-id: 97452 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 06/11] file-posix: Support BDRV_REQ_ZERO_WRITE for truncate +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the +OS, so we can advertise the flag and just ignore it. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Max Reitz +Message-Id: <20200424125448.63318-7-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 2f0c6e7a650de133eccd94e9bb6cf7b2070f07f1) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 7551e8d..adafbfa 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -674,6 +674,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + #endif + + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; ++ if (S_ISREG(st.st_mode)) { ++ /* When extending regular files, we get zeros from the OS */ ++ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; ++ } + ret = 0; + fail: + if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch b/SOURCES/kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch new file mode 100644 index 0000000..e34f576 --- /dev/null +++ b/SOURCES/kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch @@ -0,0 +1,275 @@ +From a0816e4374759048cb24b9b3549a093a2ccb6240 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:50 +0100 +Subject: [PATCH 07/12] hmat acpi: Build Memory Proximity Domain Attributes + Structure(s) + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-7-plai@redhat.com> +Patchwork-id: 96734 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 06/11] hmat acpi: Build Memory Proximity Domain Attributes Structure(s) +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Liu Jingqi + +HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table +(HMAT). The specification references below link: +http://www.uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf + +It describes the memory attributes, such as memory side cache +attributes and bandwidth and latency details, related to the +Memory Proximity Domain. The software is +expected to use this information as hint for optimization. + +This structure describes Memory Proximity Domain Attributes by memory +subsystem and its associativity with processor proximity domain as well as +hint for memory usage. + +In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report +the platform's HMAT tables. + +Acked-by: Markus Armbruster +Reviewed-by: Igor Mammedov +Reviewed-by: Daniel Black +Reviewed-by: Jonathan Cameron +Signed-off-by: Liu Jingqi +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-5-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit e6f123c3b81241be33f1b763d0ff8b36d1ae9c1e) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + hw/acpi/Kconfig | 7 ++-- + hw/acpi/Makefile.objs | 1 + + hw/acpi/hmat.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ + hw/acpi/hmat.h | 42 ++++++++++++++++++++++ + hw/i386/acpi-build.c | 5 +++ + 5 files changed, 152 insertions(+), 2 deletions(-) + create mode 100644 hw/acpi/hmat.c + create mode 100644 hw/acpi/hmat.h + +diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig +index 12e3f1e..54209c6 100644 +--- a/hw/acpi/Kconfig ++++ b/hw/acpi/Kconfig +@@ -7,6 +7,7 @@ config ACPI_X86 + select ACPI_NVDIMM + select ACPI_CPU_HOTPLUG + select ACPI_MEMORY_HOTPLUG ++ select ACPI_HMAT + + config ACPI_X86_ICH + bool +@@ -23,6 +24,10 @@ config ACPI_NVDIMM + bool + depends on ACPI + ++config ACPI_HMAT ++ bool ++ depends on ACPI ++ + config ACPI_PCI + bool + depends on ACPI && PCI +@@ -33,5 +38,3 @@ config ACPI_VMGENID + depends on PC + + config ACPI_HW_REDUCED +- bool +- depends on ACPI +diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs +index 655a9c1..517bd88 100644 +--- a/hw/acpi/Makefile.objs ++++ b/hw/acpi/Makefile.objs +@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o + common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o + common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o + common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o ++common-obj-$(CONFIG_ACPI_HMAT) += hmat.o + common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o + + common-obj-y += acpi_interface.o +diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c +new file mode 100644 +index 0000000..9ff7930 +--- /dev/null ++++ b/hw/acpi/hmat.c +@@ -0,0 +1,99 @@ ++/* ++ * HMAT ACPI Implementation ++ * ++ * Copyright(C) 2019 Intel Corporation. ++ * ++ * Author: ++ * Liu jingqi ++ * Tao Xu ++ * ++ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table ++ * (HMAT) ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, see ++ */ ++ ++#include "qemu/osdep.h" ++#include "sysemu/numa.h" ++#include "hw/acpi/hmat.h" ++ ++/* ++ * ACPI 6.3: ++ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145 ++ */ ++static void build_hmat_mpda(GArray *table_data, uint16_t flags, ++ uint32_t initiator, uint32_t mem_node) ++{ ++ ++ /* Memory Proximity Domain Attributes Structure */ ++ /* Type */ ++ build_append_int_noprefix(table_data, 0, 2); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 2); ++ /* Length */ ++ build_append_int_noprefix(table_data, 40, 4); ++ /* Flags */ ++ build_append_int_noprefix(table_data, flags, 2); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 2); ++ /* Proximity Domain for the Attached Initiator */ ++ build_append_int_noprefix(table_data, initiator, 4); ++ /* Proximity Domain for the Memory */ ++ build_append_int_noprefix(table_data, mem_node, 4); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 4); ++ /* ++ * Reserved: ++ * Previously defined as the Start Address of the System Physical ++ * Address Range. Deprecated since ACPI Spec 6.3. ++ */ ++ build_append_int_noprefix(table_data, 0, 8); ++ /* ++ * Reserved: ++ * Previously defined as the Range Length of the region in bytes. ++ * Deprecated since ACPI Spec 6.3. ++ */ ++ build_append_int_noprefix(table_data, 0, 8); ++} ++ ++/* Build HMAT sub table structures */ ++static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) ++{ ++ uint16_t flags; ++ int i; ++ ++ for (i = 0; i < numa_state->num_nodes; i++) { ++ flags = 0; ++ ++ if (numa_state->nodes[i].initiator < MAX_NODES) { ++ flags |= HMAT_PROXIMITY_INITIATOR_VALID; ++ } ++ ++ build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i); ++ } ++} ++ ++void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state) ++{ ++ int hmat_start = table_data->len; ++ ++ /* reserve space for HMAT header */ ++ acpi_data_push(table_data, 40); ++ ++ hmat_build_table_structs(table_data, numa_state); ++ ++ build_header(linker, table_data, ++ (void *)(table_data->data + hmat_start), ++ "HMAT", table_data->len - hmat_start, 2, NULL, NULL); ++} +diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h +new file mode 100644 +index 0000000..437dbc6 +--- /dev/null ++++ b/hw/acpi/hmat.h +@@ -0,0 +1,42 @@ ++/* ++ * HMAT ACPI Implementation Header ++ * ++ * Copyright(C) 2019 Intel Corporation. ++ * ++ * Author: ++ * Liu jingqi ++ * Tao Xu ++ * ++ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table ++ * (HMAT) ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, see ++ */ ++ ++#ifndef HMAT_H ++#define HMAT_H ++ ++#include "hw/acpi/aml-build.h" ++ ++/* ++ * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure, ++ * Table 5-145, Field "flag", Bit [0]: set to 1 to indicate that data in ++ * the Proximity Domain for the Attached Initiator field is valid. ++ * Other bits reserved. ++ */ ++#define HMAT_PROXIMITY_INITIATOR_VALID 0x1 ++ ++void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state); ++ ++#endif +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 6400189..b1f8c55 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -67,6 +67,7 @@ + #include "hw/i386/intel_iommu.h" + + #include "hw/acpi/ipmi.h" ++#include "hw/acpi/hmat.h" + + /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and + * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows +@@ -2837,6 +2838,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + acpi_add_table(table_offsets, tables_blob); + build_slit(tables_blob, tables->linker, machine); + } ++ if (machine->numa_state->hmat_enabled) { ++ acpi_add_table(table_offsets, tables_blob); ++ build_hmat(tables_blob, tables->linker, machine->numa_state); ++ } + } + if (acpi_get_mcfg(&mcfg)) { + acpi_add_table(table_offsets, tables_blob); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch b/SOURCES/kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch new file mode 100644 index 0000000..01ef4ce --- /dev/null +++ b/SOURCES/kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch @@ -0,0 +1,137 @@ +From d00453667cb972dc2fe1242081d3b39313a6a925 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:52 +0100 +Subject: [PATCH 09/12] hmat acpi: Build Memory Side Cache Information + Structure(s) + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-9-plai@redhat.com> +Patchwork-id: 96741 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 08/11] hmat acpi: Build Memory Side Cache Information Structure(s) +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Liu Jingqi + +This structure describes memory side cache information for memory +proximity domains if the memory side cache is present and the +physical device forms the memory side cache. +The software could use this information to effectively place +the data in memory to maximize the performance of the system +memory that use the memory side cache. + +Acked-by: Markus Armbruster +Reviewed-by: Igor Mammedov +Reviewed-by: Daniel Black +Reviewed-by: Jonathan Cameron +Signed-off-by: Liu Jingqi +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-7-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a9c2b841af002db6e21e1297c9026b63fc22c875) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + hw/acpi/hmat.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 68 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c +index 4635d45..7c24bb5 100644 +--- a/hw/acpi/hmat.c ++++ b/hw/acpi/hmat.c +@@ -143,14 +143,62 @@ static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb, + g_free(entry_list); + } + ++/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */ ++static void build_hmat_cache(GArray *table_data, uint8_t total_levels, ++ NumaHmatCacheOptions *hmat_cache) ++{ ++ /* ++ * Cache Attributes: Bits [3:0] – Total Cache Levels ++ * for this Memory Proximity Domain ++ */ ++ uint32_t cache_attr = total_levels; ++ ++ /* Bits [7:4] : Cache Level described in this structure */ ++ cache_attr |= (uint32_t) hmat_cache->level << 4; ++ ++ /* Bits [11:8] - Cache Associativity */ ++ cache_attr |= (uint32_t) hmat_cache->associativity << 8; ++ ++ /* Bits [15:12] - Write Policy */ ++ cache_attr |= (uint32_t) hmat_cache->policy << 12; ++ ++ /* Bits [31:16] - Cache Line size in bytes */ ++ cache_attr |= (uint32_t) hmat_cache->line << 16; ++ ++ /* Type */ ++ build_append_int_noprefix(table_data, 2, 2); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 2); ++ /* Length */ ++ build_append_int_noprefix(table_data, 32, 4); ++ /* Proximity Domain for the Memory */ ++ build_append_int_noprefix(table_data, hmat_cache->node_id, 4); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 4); ++ /* Memory Side Cache Size */ ++ build_append_int_noprefix(table_data, hmat_cache->size, 8); ++ /* Cache Attributes */ ++ build_append_int_noprefix(table_data, cache_attr, 4); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 2); ++ /* ++ * Number of SMBIOS handles (n) ++ * Linux kernel uses Memory Side Cache Information Structure ++ * without SMBIOS entries for now, so set Number of SMBIOS handles ++ * as 0. ++ */ ++ build_append_int_noprefix(table_data, 0, 2); ++} ++ + /* Build HMAT sub table structures */ + static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) + { + uint16_t flags; + uint32_t num_initiator = 0; + uint32_t initiator_list[MAX_NODES]; +- int i, hierarchy, type; ++ int i, hierarchy, type, cache_level, total_levels; + HMAT_LB_Info *hmat_lb; ++ NumaHmatCacheOptions *hmat_cache; + + for (i = 0; i < numa_state->num_nodes; i++) { + flags = 0; +@@ -184,6 +232,25 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) + } + } + } ++ ++ /* ++ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: ++ * Table 5-147 ++ */ ++ for (i = 0; i < numa_state->num_nodes; i++) { ++ total_levels = 0; ++ for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) { ++ if (numa_state->hmat_cache[i][cache_level]) { ++ total_levels++; ++ } ++ } ++ for (cache_level = 0; cache_level <= total_levels; cache_level++) { ++ hmat_cache = numa_state->hmat_cache[i][cache_level]; ++ if (hmat_cache) { ++ build_hmat_cache(table_data, total_levels, hmat_cache); ++ } ++ } ++ } + } + + void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch b/SOURCES/kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch new file mode 100644 index 0000000..a7120d7 --- /dev/null +++ b/SOURCES/kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch @@ -0,0 +1,173 @@ +From f55b8b251c323856087baf2380d93fbf2da15db7 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:51 +0100 +Subject: [PATCH 08/12] hmat acpi: Build System Locality Latency and Bandwidth + Information Structure(s) + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-8-plai@redhat.com> +Patchwork-id: 96733 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 07/11] hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s) +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Liu Jingqi + +This structure describes the memory access latency and bandwidth +information from various memory access initiator proximity domains. +The latency and bandwidth numbers represented in this structure +correspond to rated latency and bandwidth for the platform. +The software could use this information as hint for optimization. + +Acked-by: Markus Armbruster +Reviewed-by: Igor Mammedov +Signed-off-by: Liu Jingqi +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-6-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 4586a2cb833f80b19c80ebe364a005ac2fa0974a) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + hw/acpi/hmat.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 103 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c +index 9ff7930..4635d45 100644 +--- a/hw/acpi/hmat.c ++++ b/hw/acpi/hmat.c +@@ -25,6 +25,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/units.h" + #include "sysemu/numa.h" + #include "hw/acpi/hmat.h" + +@@ -67,11 +68,89 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, + build_append_int_noprefix(table_data, 0, 8); + } + ++/* ++ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information ++ * Structure: Table 5-146 ++ */ ++static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb, ++ uint32_t num_initiator, uint32_t num_target, ++ uint32_t *initiator_list) ++{ ++ int i, index; ++ HMAT_LB_Data *lb_data; ++ uint16_t *entry_list; ++ uint32_t base; ++ /* Length in bytes for entire structure */ ++ uint32_t lb_length ++ = 32 /* Table length upto and including Entry Base Unit */ ++ + 4 * num_initiator /* Initiator Proximity Domain List */ ++ + 4 * num_target /* Target Proximity Domain List */ ++ + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */ ++ ++ /* Type */ ++ build_append_int_noprefix(table_data, 1, 2); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 2); ++ /* Length */ ++ build_append_int_noprefix(table_data, lb_length, 4); ++ /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */ ++ assert(!(hmat_lb->hierarchy >> 4)); ++ build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1); ++ /* Data Type */ ++ build_append_int_noprefix(table_data, hmat_lb->data_type, 1); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 2); ++ /* Number of Initiator Proximity Domains (s) */ ++ build_append_int_noprefix(table_data, num_initiator, 4); ++ /* Number of Target Proximity Domains (t) */ ++ build_append_int_noprefix(table_data, num_target, 4); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 4); ++ ++ /* Entry Base Unit */ ++ if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) { ++ /* Convert latency base from nanoseconds to picosecond */ ++ base = hmat_lb->base * 1000; ++ } else { ++ /* Convert bandwidth base from Byte to Megabyte */ ++ base = hmat_lb->base / MiB; ++ } ++ build_append_int_noprefix(table_data, base, 8); ++ ++ /* Initiator Proximity Domain List */ ++ for (i = 0; i < num_initiator; i++) { ++ build_append_int_noprefix(table_data, initiator_list[i], 4); ++ } ++ ++ /* Target Proximity Domain List */ ++ for (i = 0; i < num_target; i++) { ++ build_append_int_noprefix(table_data, i, 4); ++ } ++ ++ /* Latency or Bandwidth Entries */ ++ entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t)); ++ for (i = 0; i < hmat_lb->list->len; i++) { ++ lb_data = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); ++ index = lb_data->initiator * num_target + lb_data->target; ++ ++ entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base); ++ } ++ ++ for (i = 0; i < num_initiator * num_target; i++) { ++ build_append_int_noprefix(table_data, entry_list[i], 2); ++ } ++ ++ g_free(entry_list); ++} ++ + /* Build HMAT sub table structures */ + static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) + { + uint16_t flags; +- int i; ++ uint32_t num_initiator = 0; ++ uint32_t initiator_list[MAX_NODES]; ++ int i, hierarchy, type; ++ HMAT_LB_Info *hmat_lb; + + for (i = 0; i < numa_state->num_nodes; i++) { + flags = 0; +@@ -82,6 +161,29 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) + + build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i); + } ++ ++ for (i = 0; i < numa_state->num_nodes; i++) { ++ if (numa_state->nodes[i].has_cpu) { ++ initiator_list[num_initiator++] = i; ++ } ++ } ++ ++ /* ++ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information ++ * Structure: Table 5-146 ++ */ ++ for (hierarchy = HMAT_LB_MEM_MEMORY; ++ hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) { ++ for (type = HMAT_LB_DATA_ACCESS_LATENCY; ++ type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) { ++ hmat_lb = numa_state->hmat_lb[hierarchy][type]; ++ ++ if (hmat_lb && hmat_lb->list->len) { ++ build_hmat_lb(table_data, hmat_lb, num_initiator, ++ numa_state->num_nodes, initiator_list); ++ } ++ } ++ } + } + + void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch b/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch new file mode 100644 index 0000000..f01dec2 --- /dev/null +++ b/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch @@ -0,0 +1,100 @@ +From cebc614e5ddd1f770c4d6dc26c066791f36e56df Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:02 +0000 +Subject: [PATCH 05/18] hmp: Allow using qdev ID for qemu-io command + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-5-kwolf@redhat.com> +Patchwork-id: 93750 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] hmp: Allow using qdev ID for qemu-io command +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +In order to issue requests on an existing BlockBackend with the +'qemu-io' HMP command, allow specifying the BlockBackend not only with a +BlockBackend name, but also with a qdev ID/QOM path for a device that +owns the (possibly anonymous) BlockBackend. + +Because qdev names could be conflicting with BlockBackend and node +names, introduce a -d option to explicitly address a device. If the +option is not given, a BlockBackend or a node is addressed. + +Signed-off-by: Kevin Wolf +(cherry picked from commit 89b6fc45614bb45dcd58f1590415afe5c2791abd) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + hmp-commands.hx | 8 +++++--- + monitor/hmp-cmds.c | 28 ++++++++++++++++++---------- + 2 files changed, 23 insertions(+), 13 deletions(-) + +diff --git a/hmp-commands.hx b/hmp-commands.hx +index cfcc044..dc23185 100644 +--- a/hmp-commands.hx ++++ b/hmp-commands.hx +@@ -1875,9 +1875,11 @@ ETEXI + + { + .name = "qemu-io", +- .args_type = "device:B,command:s", +- .params = "[device] \"[command]\"", +- .help = "run a qemu-io command on a block device", ++ .args_type = "qdev:-d,device:B,command:s", ++ .params = "[-d] [device] \"[command]\"", ++ .help = "run a qemu-io command on a block device\n\t\t\t" ++ "-d: [device] is a device ID rather than a " ++ "drive ID or node name", + .cmd = hmp_qemu_io, + }, + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index b2551c1..5f8941d 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -2468,23 +2468,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + { + BlockBackend *blk; + BlockBackend *local_blk = NULL; ++ bool qdev = qdict_get_try_bool(qdict, "qdev", false); + const char* device = qdict_get_str(qdict, "device"); + const char* command = qdict_get_str(qdict, "command"); + Error *err = NULL; + int ret; + +- blk = blk_by_name(device); +- if (!blk) { +- BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); +- if (bs) { +- blk = local_blk = blk_new(bdrv_get_aio_context(bs), +- 0, BLK_PERM_ALL); +- ret = blk_insert_bs(blk, bs, &err); +- if (ret < 0) { ++ if (qdev) { ++ blk = blk_by_qdev_id(device, &err); ++ if (!blk) { ++ goto fail; ++ } ++ } else { ++ blk = blk_by_name(device); ++ if (!blk) { ++ BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); ++ if (bs) { ++ blk = local_blk = blk_new(bdrv_get_aio_context(bs), ++ 0, BLK_PERM_ALL); ++ ret = blk_insert_bs(blk, bs, &err); ++ if (ret < 0) { ++ goto fail; ++ } ++ } else { + goto fail; + } +- } else { +- goto fail; + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch b/SOURCES/kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch new file mode 100644 index 0000000..2f4f6dd --- /dev/null +++ b/SOURCES/kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch @@ -0,0 +1,77 @@ +From fe8a9f211fba3588d60507b3d2f48c41d8ee3c79 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Mon, 4 May 2020 21:25:04 +0100 +Subject: [PATCH 1/9] hw/pci/pcie: Forbid hot-plug if it's disabled on the slot + +RH-Author: Julia Suvorova +Message-id: <20200504212505.15977-2-jusual@redhat.com> +Patchwork-id: 96257 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] hw/pci/pcie: Forbid hot-plug if it's disabled on the slot +Bugzilla: 1820531 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Peter Xu + +Raise an error when trying to hot-plug/unplug a device through QMP to a device +with disabled hot-plug capability. This makes the device behaviour more +consistent and provides an explanation of the failure in the case of +asynchronous unplug. + +Signed-off-by: Julia Suvorova +Message-Id: <20200427182440.92433-2-jusual@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Marcel Apfelbaum +(cherry picked from commit 0501e1aa1d32a6e02dd06a79bba97fbe9d557cb5) +Signed-off-by: Danilo C. L. de Paula +--- + hw/pci/pcie.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index 0eb3a2a..6b48d04 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -415,6 +415,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + { + PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; ++ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + PCIDevice *pci_dev = PCI_DEVICE(dev); + + /* Don't send event when device is enabled during qemu machine creation: +@@ -430,6 +431,13 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + ++ /* Check if hot-plug is disabled on the slot */ ++ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { ++ error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", ++ DEVICE(hotplug_pdev)->id); ++ return; ++ } ++ + /* To enable multifunction hot-plug, we just ensure the function + * 0 added last. When function 0 is added, we set the sltsta and + * inform OS via event notification. +@@ -470,6 +478,17 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + Error *local_err = NULL; + PCIDevice *pci_dev = PCI_DEVICE(dev); + PCIBus *bus = pci_get_bus(pci_dev); ++ PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); ++ uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; ++ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); ++ ++ /* Check if hot-unplug is disabled on the slot */ ++ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { ++ error_setg(errp, "Hot-unplug failed: " ++ "unsupported by the port device '%s'", ++ DEVICE(hotplug_pdev)->id); ++ return; ++ } + + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &local_err); + if (local_err) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch b/SOURCES/kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch new file mode 100644 index 0000000..0c44c77 --- /dev/null +++ b/SOURCES/kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch @@ -0,0 +1,90 @@ +From 035f8aaabf2c31cd6206bff6da23a12fee69d1b7 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Tue, 16 Jun 2020 12:25:36 -0400 +Subject: [PATCH 1/3] hw/pci/pcie: Move hot plug capability check to pre_plug + callback + +RH-Author: Julia Suvorova +Message-id: <20200616122536.1027685-1-jusual@redhat.com> +Patchwork-id: 97548 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] hw/pci/pcie: Move hot plug capability check to pre_plug callback +Bugzilla: 1820531 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Auger Eric +RH-Acked-by: Sergio Lopez Pascual + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1820531 +BRANCH: rhel-av-8.2.1 +UPSTREAM: merged +BREW: 29422092 + +Check for hot plug capability earlier to avoid removing devices attached +during the initialization process. + +Run qemu with an unattached drive: + -drive file=$FILE,if=none,id=drive0 \ + -device pcie-root-port,id=rp0,slot=3,bus=pcie.0,hotplug=off +Hotplug a block device: + device_add virtio-blk-pci,id=blk0,drive=drive0,bus=rp0 +If hotplug fails on plug_cb, drive0 will be deleted. + +Fixes: 0501e1aa1d32a6 ("hw/pci/pcie: Forbid hot-plug if it's disabled on the slot") + +Acked-by: Igor Mammedov +Signed-off-by: Julia Suvorova +Message-Id: <20200604125947.881210-1-jusual@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0dabc0f6544f2c0310546f6d6cf3b68979580a9c) +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/pci/pcie.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index abc99b6eff..1386dd228c 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -407,6 +407,17 @@ static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, + void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { ++ PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); ++ uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; ++ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); ++ ++ /* Check if hot-plug is disabled on the slot */ ++ if (dev->hotplugged && (sltcap & PCI_EXP_SLTCAP_HPC) == 0) { ++ error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", ++ DEVICE(hotplug_pdev)->id); ++ return; ++ } ++ + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, errp); + } + +@@ -415,7 +426,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + { + PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; +- uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + PCIDevice *pci_dev = PCI_DEVICE(dev); + + /* Don't send event when device is enabled during qemu machine creation: +@@ -431,13 +441,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + +- /* Check if hot-plug is disabled on the slot */ +- if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { +- error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", +- DEVICE(hotplug_pdev)->id); +- return; +- } +- + /* To enable multifunction hot-plug, we just ensure the function + * 0 added last. When function 0 is added, we set the sltsta and + * inform OS via event notification. +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch b/SOURCES/kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch new file mode 100644 index 0000000..51a587f --- /dev/null +++ b/SOURCES/kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch @@ -0,0 +1,62 @@ +From f98a1fdad0aa53337925ac46b73a3e6ad36f6295 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Mon, 4 May 2020 21:25:05 +0100 +Subject: [PATCH 2/9] hw/pci/pcie: Replace PCI_DEVICE() casts with existing + variable + +RH-Author: Julia Suvorova +Message-id: <20200504212505.15977-3-jusual@redhat.com> +Patchwork-id: 96259 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] hw/pci/pcie: Replace PCI_DEVICE() casts with existing variable +Bugzilla: 1820531 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Peter Xu + +A little cleanup is possible because of hotplug_pdev introduction. + +Signed-off-by: Julia Suvorova +Message-Id: <20200427182440.92433-3-jusual@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Marcel Apfelbaum +(cherry picked from commit 6a1e073378353eb6ac0565e0dc649b3db76ed5dc) +Signed-off-by: Danilo C. L. de Paula +--- + hw/pci/pcie.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index 6b48d04..abc99b6 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -449,7 +449,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } +- pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), ++ pcie_cap_slot_event(hotplug_pdev, + PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + } + } +@@ -490,7 +490,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + return; + } + +- pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &local_err); ++ pcie_cap_slot_plug_common(hotplug_pdev, dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; +@@ -509,7 +509,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + return; + } + +- pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev)); ++ pcie_cap_slot_push_attention_button(hotplug_pdev); + } + + /* pci express slot for pci express root/downstream port +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch b/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch new file mode 100644 index 0000000..0f0f126 --- /dev/null +++ b/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch @@ -0,0 +1,262 @@ +From e6c3fbfc82863180007569cf2a9132c28a47bf1f Mon Sep 17 00:00:00 2001 +From: "Daniel P. Berrange" +Date: Mon, 20 Jan 2020 16:13:08 +0000 +Subject: [PATCH 01/18] hw/smbios: set new default SMBIOS fields for Windows + driver support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrange +Message-id: <20200120161308.584989-2-berrange@redhat.com> +Patchwork-id: 93422 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] hw/smbios: set new default SMBIOS fields for Windows driver support +Bugzilla: 1782529 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Igor Mammedov +RH-Acked-by: Laszlo Ersek + +For Windows driver support, we have to follow this doc in order to +enable Windows to automatically determine the right drivers to install +for a given guest / host combination: + + https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer + +Out of the choices available, it was decided that the Windows drivers +will be written to expect use of the scheme documented as "HardwareID-6" +against Windows 10. This uses SMBIOS System (Type 1) and Base Board +(Type 2) tables and will match on + + System Manufacturer = Red Hat + System SKU Number = 8.2.0 + Baseboard Manufacturer = Red Hat + Baseboard Product = RHEL-AV + +The new SMBIOS fields will be tied to machine type and only reported for +pc-q35-8.2.0 machine and later. + +The old SMBIOS fields, previously reported by all machines were: + + System Manufacturer: Red Hat + System Product Name: KVM + System Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + System Family: Red Hat Enterprise Linux + Baseboard Manufacturer: Red Hat + Baseboard Product Name: KVM + Baseboard Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + Chassis Manufacturer: Red Hat + Chassis Product Name: KVM + Chassis Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + Processor Manufacturer: Red Hat + Processor Product Name: KVM + Processor Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + +This information will continue to be reported for all machines, except +where it conflicts with the requirement of the new SMBIOS data. IOW, +the "Baseboard Product Name" will change to "RHEL-AV" for pc-q35-8.2.0 +machine types and later. + +Management applications MUST NEVER override the 4 new SMBIOS fields that +are used for Windows driver matching, with differing values. Aside from +this, they are free to override any other field, including those from +the old SMBIOS field data. + +In particular if a management application wants to report its own +product name and version, it is recommended to use "System product" +and "System version" as identifying fields, as these avoid a clash with +the new SMBIOS fields used for Windows drivers. + +Note that until now the Baseboard (type 2) table has only been generated +by QEMU if explicitly asked for on the CLI. This patch makes it always +present for new machine types. + +Signed-off-by: Daniel P. Berrangé +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 2 +- + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 8 ++++++++ + hw/smbios/smbios.c | 45 +++++++++++++++++++++++++++++++++++++++++--- + include/hw/firmware/smbios.h | 5 ++++- + include/hw/i386/pc.h | 3 +++ + 6 files changed, 60 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d30d38c..2dcf6e7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1423,7 +1423,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len); +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bd7fdb9..2ac94d5 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 7531d8e..e975643 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +@@ -565,8 +567,11 @@ static void pc_q35_init_rhel820(MachineState *machine) + + static void pc_q35_machine_rhel820_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; + } + + DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, +@@ -579,9 +584,12 @@ static void pc_q35_init_rhel810(MachineState *machine) + + static void pc_q35_machine_rhel810_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel820_options(m); + m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); + } +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index e6e9355..d65c149 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -532,7 +535,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -753,7 +756,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -774,12 +780,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); + SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 02a0ced..67e38a1 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 2e362c8..b9f29ba 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -109,6 +109,9 @@ typedef struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch b/SOURCES/kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch new file mode 100644 index 0000000..b2cc438 --- /dev/null +++ b/SOURCES/kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch @@ -0,0 +1,199 @@ +From 1bee5a77b3f999d2933a440021737d0720b32269 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 29 Jul 2020 18:56:21 -0400 +Subject: [PATCH 1/4] i386: Add 2nd Generation AMD EPYC processors + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200729185621.152427-2-dgilbert@redhat.com> +Patchwork-id: 98078 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] i386: Add 2nd Generation AMD EPYC processors +Bugzilla: 1780385 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Maxim Levitsky + +From: "Moger, Babu" + +Adds the support for 2nd Gen AMD EPYC Processors. The model display +name will be EPYC-Rome. + +Adds the following new feature bits on top of the feature bits from the +first generation EPYC models. +perfctr-core : core performance counter extensions support. Enables the VM to + use extended performance counter support. It enables six + programmable counters instead of four counters. +clzero : instruction zeroes out the 64 byte cache line specified in RAX. +xsaveerptr : XSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES always save error + pointers and FXRSTOR, XRSTOR, XRSTORS always restore error + pointers. +wbnoinvd : Write back and do not invalidate cache +ibpb : Indirect Branch Prediction Barrier +amd-stibp : Single Thread Indirect Branch Predictor +clwb : Cache Line Write Back and Retain +xsaves : XSAVES, XRSTORS and IA32_XSS support +rdpid : Read Processor ID instruction support +umip : User-Mode Instruction Prevention support + +The Reference documents are available at +https://developer.amd.com/wp-content/resources/55803_0.54-PUB.pdf +https://www.amd.com/system/files/TechDocs/24594.pdf + +Depends on following kernel commits: +40bc47b08b6e ("kvm: x86: Enumerate support for CLZERO instruction") +504ce1954fba ("KVM: x86: Expose XSAVEERPTR to the guest") +6d61e3c32248 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID") +52297436199d ("kvm: svm: Update svm_xsaves_supported") + +Signed-off-by: Babu Moger +Message-Id: <157314966312.23828.17684821666338093910.stgit@naples-babu.amd.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 143c30d4d346831a09e59e9af45afdca0331e819) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 102 +++++++++++++++++++++++++++++++++++++++++++++- + target/i386/cpu.h | 2 + + 2 files changed, 103 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a343de0c9d..ff39fc9905 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1133,7 +1133,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "clzero", NULL, "xsaveerptr", NULL, + NULL, NULL, NULL, NULL, + NULL, "wbnoinvd", NULL, NULL, +- "ibpb", NULL, NULL, NULL, ++ "ibpb", NULL, NULL, "amd-stibp", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, +@@ -1803,6 +1803,56 @@ static CPUCaches epyc_cache_info = { + }, + }; + ++static CPUCaches epyc_rome_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 16 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 16384, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = true, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -4024,6 +4074,56 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Hygon Dhyana Processor", + .cache_info = &epyc_cache_info, + }, ++ { ++ .name = "EPYC-Rome", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 23, ++ .model = 49, ++ .stepping = 0, ++ .features[FEAT_1_EDX] = ++ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | ++ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | ++ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | ++ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | ++ CPUID_VME | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | ++ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | ++ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | ++ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | ++ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | ++ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | ++ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | ++ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | ++ CPUID_8000_0008_EBX_STIBP, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | ++ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | ++ CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_SVM] = ++ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, ++ .xlevel = 0x8000001E, ++ .model_id = "AMD EPYC-Rome Processor", ++ .cache_info = &epyc_rome_cache_info, ++ }, + }; + + /* KVM-specific features that are automatically added/removed +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 7bfbf2a5e5..f3da25cb8a 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -792,6 +792,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) + /* Indirect Branch Prediction Barrier */ + #define CPUID_8000_0008_EBX_IBPB (1U << 12) ++/* Single Thread Indirect Branch Predictors */ ++#define CPUID_8000_0008_EBX_STIBP (1U << 15) + + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) +-- +2.27.0 + diff --git a/SOURCES/kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch b/SOURCES/kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch new file mode 100644 index 0000000..823ff0c --- /dev/null +++ b/SOURCES/kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch @@ -0,0 +1,46 @@ +From cdafcc1d68110ed172c09c9e6bba42ee15b5a6df Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Fri, 15 May 2020 18:02:40 +0100 +Subject: [PATCH 13/17] i386: Add MSR feature bit for MDS-NO + +RH-Author: plai@redhat.com +Message-id: <20200515180243.17488-2-plai@redhat.com> +Patchwork-id: 96609 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 1/4] i386: Add MSR feature bit for MDS-NO +Bugzilla: 1769912 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +From: Cathy Zhang + +Define MSR_ARCH_CAP_MDS_NO in the IA32_ARCH_CAPABILITIES MSR to allow +CPU models to report the feature when host supports it. + +Signed-off-by: Cathy Zhang +Reviewed-by: Xiaoyao Li +Reviewed-by: Tao Xu +Message-Id: <1571729728-23284-2-git-send-email-cathy.zhang@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 77b168d221191156c47fcd8d1c47329dfdb9439e) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 4441061..60304cc 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -839,6 +839,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define MSR_ARCH_CAP_RSBA (1U << 2) + #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) + #define MSR_ARCH_CAP_SSB_NO (1U << 4) ++#define MSR_ARCH_CAP_MDS_NO (1U << 5) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Add-macro-for-stibp.patch b/SOURCES/kvm-i386-Add-macro-for-stibp.patch new file mode 100644 index 0000000..17dd149 --- /dev/null +++ b/SOURCES/kvm-i386-Add-macro-for-stibp.patch @@ -0,0 +1,49 @@ +From 00f916987589f114f42ce20b138c00c47b9e4df7 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Fri, 15 May 2020 18:02:41 +0100 +Subject: [PATCH 14/17] i386: Add macro for stibp + +RH-Author: plai@redhat.com +Message-id: <20200515180243.17488-3-plai@redhat.com> +Patchwork-id: 96610 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 2/4] i386: Add macro for stibp +Bugzilla: 1769912 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Cathy Zhang + +stibp feature is already added through the following commit. +https://github.com/qemu/qemu/commit/0e8916582991b9fd0b94850a8444b8b80d0a0955 + +Add a macro for it to allow CPU models to report it when host supports. + +Signed-off-by: Cathy Zhang +Reviewed-by: Xiaoyao Li +Reviewed-by: Tao Xu +Message-Id: <1571729728-23284-3-git-send-email-cathy.zhang@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 5af514d0cb314f43bc53f2aefb437f6451d64d0c) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 60304cc..e77d101 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -772,6 +772,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) + /* Speculation Control */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) ++/* Single Thread Indirect Branch Predictors */ ++#define CPUID_7_0_EDX_STIBP (1U << 27) + /* Arch Capabilities */ + #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) + /* Core Capability */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Add-new-CPU-model-Cooperlake.patch b/SOURCES/kvm-i386-Add-new-CPU-model-Cooperlake.patch new file mode 100644 index 0000000..289d1e3 --- /dev/null +++ b/SOURCES/kvm-i386-Add-new-CPU-model-Cooperlake.patch @@ -0,0 +1,108 @@ +From cf62577aed781b2515ea97b9f42285c2f608a7bf Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Fri, 15 May 2020 18:02:42 +0100 +Subject: [PATCH 16/17] i386: Add new CPU model Cooperlake + +RH-Author: plai@redhat.com +Message-id: <20200515180243.17488-4-plai@redhat.com> +Patchwork-id: 96608 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 3/4] i386: Add new CPU model Cooperlake +Bugzilla: 1769912 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Cathy Zhang + +Cooper Lake is intel's successor to Cascade Lake, the new +CPU model inherits features from Cascadelake-Server, while +add one platform associated new feature: AVX512_BF16. Meanwhile, +add STIBP for speculative execution. + +Signed-off-by: Cathy Zhang +Reviewed-by: Xiaoyao Li +Reviewed-by: Tao Xu +Message-Id: <1571729728-23284-4-git-send-email-cathy.zhang@intel.com> +Reviewed-by: Bruce Rogers +Signed-off-by: Eduardo Habkost +(cherry picked from commit 22a866b6166db5caa4abaa6e656c2a431fa60726) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 60 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0f0a2db..996a74f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3161,6 +3161,66 @@ static X86CPUDefinition builtin_x86_defs[] = { + } + }, + { ++ .name = "Cooperlake", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 85, ++ .stepping = 10, ++ .features[FEAT_1_EDX] = ++ CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | ++ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | ++ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | ++ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | ++ CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | ++ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | ++ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | ++ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | ++ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | ++ CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | ++ CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | ++ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | ++ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | ++ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | ++ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | ++ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_PKU | ++ CPUID_7_0_ECX_AVX512VNNI, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_STIBP | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX512_BF16, ++ /* ++ * Missing: XSAVES (not supported by some Linux versions, ++ * including v4.1 to v4.12). ++ * KVM doesn't yet expose any XSAVES state save component, ++ * and the only one defined in Skylake (processor tracing) ++ * probably will block migration anyway. ++ */ ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (Cooperlake)", ++ }, ++ { + .name = "Icelake-Client", + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch b/SOURCES/kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch new file mode 100644 index 0000000..17251bf --- /dev/null +++ b/SOURCES/kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch @@ -0,0 +1,82 @@ +From d3b9c1891a6d05308dd5ea119d2c32c8f98a25da Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 30 Jun 2020 23:40:15 -0400 +Subject: [PATCH 1/4] i386: Mask SVM features if nested SVM is disabled + +RH-Author: Eduardo Habkost +Message-id: <20200630234015.166253-2-ehabkost@redhat.com> +Patchwork-id: 97852 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] i386: Mask SVM features if nested SVM is disabled +Bugzilla: 1835390 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Bandan Das +RH-Acked-by: Dr. David Alan Gilbert + +QEMU incorrectly validates FEAT_SVM feature flags against +GET_SUPPORTED_CPUID even if SVM features are being masked out by +cpu_x86_cpuid(). This can make QEMU print warnings on most AMD +CPU models, even when SVM nesting is disabled (which is the +default). + +This bug was never detected before because of a Linux KVM bug: +until Linux v5.6, KVM was not filtering out SVM features in +GET_SUPPORTED_CPUID when nested was disabled. This KVM bug was +fixed in Linux v5.7-rc1, on Linux commit a50718cc3f43 ("KVM: +nSVM: Expose SVM features to L1 iff nested is enabled"). + +Fix the problem by adding a CPUID_EXT3_SVM dependency to all +FEAT_SVM feature flags in the feature_dependencies table. + +Reported-by: Yanan Fu +Signed-off-by: Eduardo Habkost +Message-Id: <20200623230116.277409-1-ehabkost@redhat.com> +[Fix testcase. - Paolo] +Signed-off-by: Paolo Bonzini +(cherry picked from commit 730319aef0fcb94f11a4a2d32656437fdde7efdd) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 4 ++++ + tests/test-x86-cpuid-compat.c | 4 ++-- + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 7d7b016bb7..a343de0c9d 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1477,6 +1477,10 @@ static FeatureDep feature_dependencies[] = { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VMFUNC }, + .to = { FEAT_VMX_VMFUNC, ~0ull }, + }, ++ { ++ .from = { FEAT_8000_0001_ECX, CPUID_EXT3_SVM }, ++ .to = { FEAT_SVM, ~0ull }, ++ }, + }; + + typedef struct X86RegisterInfo32 { +diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c +index e7c075ed98..983aa0719a 100644 +--- a/tests/test-x86-cpuid-compat.c ++++ b/tests/test-x86-cpuid-compat.c +@@ -256,7 +256,7 @@ int main(int argc, char **argv) + "-cpu 486,+invtsc", "xlevel", 0x80000007); + /* CPUID[8000_000A].EDX: */ + add_cpuid_test("x86/cpuid/auto-xlevel/486/npt", +- "-cpu 486,+npt", "xlevel", 0x8000000A); ++ "-cpu 486,+svm,+npt", "xlevel", 0x8000000A); + /* CPUID[C000_0001].EDX: */ + add_cpuid_test("x86/cpuid/auto-xlevel2/phenom/xstore", + "-cpu phenom,+xstore", "xlevel2", 0xC0000001); +@@ -349,7 +349,7 @@ int main(int argc, char **argv) + "-machine pc-i440fx-2.4 -cpu SandyBridge,", + "xlevel", 0x80000008); + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", +- "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", ++ "-machine pc-i440fx-2.4 -cpu SandyBridge,+svm,+npt", + "xlevel", 0x80000008); + #endif + +-- +2.27.0 + diff --git a/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch b/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch new file mode 100644 index 0000000..5d62ace --- /dev/null +++ b/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch @@ -0,0 +1,77 @@ +From 4543a3c19816bd07f27eb900f20ae609df03703c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Mon, 23 Dec 2019 21:10:31 +0000 +Subject: [PATCH 1/2] i386: Remove cpu64-rhel6 CPU model + +RH-Author: Eduardo Habkost +Message-id: <20191223211031.26503-1-ehabkost@redhat.com> +Patchwork-id: 93213 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] i386: Remove cpu64-rhel6 CPU model +Bugzilla: 1741345 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1741345 +BRANCH: rhel-av-8.2.0 +Upstream: not applicable +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25525975 + +We don't provide rhel6 machine types anymore, so we don't need to +provide compatibility with RHEl6. cpu64-rhel6 was documented as +deprecated and scheduled for removal in 8.2, so now it's time to +remove it. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 26 +------------------------- + 1 file changed, 1 insertion(+), 25 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 790db77..6dce6f2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1829,12 +1829,7 @@ static CPUCaches epyc_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { +- /* qemu64 is the default CPU model for all *-rhel7.* machine-types. +- * The default on RHEL-6 was cpu64-rhel6. +- * libvirt assumes that qemu64 is the default for _all_ machine-types, +- * so we should try to keep qemu64 and cpu64-rhel6 as similar as +- * possible. +- */ ++ /* qemu64 is the default CPU model for all machine-types */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, +@@ -2135,25 +2130,6 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", + }, + { +- .name = "cpu64-rhel6", +- .level = 4, +- .vendor = CPUID_VENDOR_AMD, +- .family = 6, +- .model = 13, +- .stepping = 3, +- .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | +- CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | +- CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | +- CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | +- CPUID_PSE | CPUID_DE | CPUID_FP87, +- .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, +- .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, +- .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | +- CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, +- .xlevel = 0x8000000A, +- .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", +- }, +- { + .name = "Conroe", + .level = 10, + .vendor = CPUID_VENDOR_INTEL, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch b/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch new file mode 100644 index 0000000..1027341 --- /dev/null +++ b/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch @@ -0,0 +1,95 @@ +From ccda4494b0ea4b81b6b0c3e539a0bcf7e673c68c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Thu, 5 Dec 2019 21:56:50 +0000 +Subject: [PATCH 01/18] i386: Resolve CPU models to v1 by default + +RH-Author: Eduardo Habkost +Message-id: <20191205225650.772600-2-ehabkost@redhat.com> +Patchwork-id: 92907 +O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/1] i386: Resolve CPU models to v1 by default +Bugzilla: 1787291 1779078 1779078 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25187823 +Upstream: submitted, Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> + +When using `query-cpu-definitions` using `-machine none`, +QEMU is resolving all CPU models to their latest versions. The +actual CPU model version being used by another machine type (e.g. +`pc-q35-4.0`) might be different. + +In theory, this was OK because the correct CPU model +version is returned when using the correct `-machine` argument. + +Except that in practice, this breaks libvirt expectations: +libvirt always use `-machine none` when checking if a CPU model +is runnable, because runnability is not expected to be affected +when the machine type is changed. + +For example, when running on a Haswell host without TSX, +Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, +`query-cpu-definitions` says Haswell is runnable if using +`-machine none`, but Haswell is actually not runnable using any +of the `pc-*` machine types (because they resolve Haswell to +Haswell-v1). In other words, we're breaking the "runnability +guarantee" we promised to not break for a few releases (see +qemu-deprecated.texi). + +To address this issue, change the default CPU model version to v1 +on all machine types, so we make `query-cpu-definitions` output +when using `-machine none` match the results when using `pc-*`. +This will change in the future (the plan is to always return the +latest CPU model version if using `-machine none`), but only +after giving libvirt the opportunity to adapt. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + qemu-deprecated.texi | 7 +++++++ + target/i386/cpu.c | 8 +++++++- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi +index 4b4b742..534ebe9 100644 +--- a/qemu-deprecated.texi ++++ b/qemu-deprecated.texi +@@ -374,6 +374,13 @@ guarantees must resolve the CPU model aliases using te + ``alias-of'' field returned by the ``query-cpu-definitions'' QMP + command. + ++While those guarantees are kept, the return value of ++``query-cpu-definitions'' will have existing CPU model aliases ++point to a version that doesn't break runnability guarantees ++(specifically, version 1 of those CPU models). In future QEMU ++versions, aliases will point to newer CPU model versions ++depending on the machine type, so management software must ++resolve CPU model aliases before starting a virtual machine. + + @node Recently removed features + @appendix Recently removed features +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6dce6f2..863192c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3926,7 +3926,13 @@ static PropValue tcg_default_props[] = { + }; + + +-X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; ++/* ++ * We resolve CPU model aliases using -v1 when using "-machine ++ * none", but this is just for compatibility while libvirt isn't ++ * adapted to resolve CPU model versions before creating VMs. ++ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. ++ */ ++X86CPUVersion default_cpu_version = 1; + + void x86_cpu_set_default_version(X86CPUVersion version) + { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-introduce-kvm_kernel_irqchip_-functions.patch b/SOURCES/kvm-introduce-kvm_kernel_irqchip_-functions.patch new file mode 100644 index 0000000..b171749 --- /dev/null +++ b/SOURCES/kvm-introduce-kvm_kernel_irqchip_-functions.patch @@ -0,0 +1,281 @@ +From 3899672db472c1ca530badd49d17726a1057f8af Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 5 Jun 2020 07:41:10 -0400 +Subject: [PATCH 40/42] kvm: introduce kvm_kernel_irqchip_* functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200605074111.2185-3-thuth@redhat.com> +Patchwork-id: 97369 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/3] kvm: introduce kvm_kernel_irqchip_* functions +Bugzilla: 1756946 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Paolo Bonzini + +The KVMState struct is opaque, so provide accessors for the fields +that will be moved from current_machine to the accelerator. For now +they just forward to the machine object, but this will change. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4376c40dedb22530738eeb104a603e94ed03f719) + +Conflicts: + accel/kvm/kvm-all.c + (contextual conflict due to missing other commits in downstream) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + accel/kvm/kvm-all.c | 23 +++++++++++++++++++---- + hw/ppc/e500.c | 5 ++--- + hw/ppc/spapr_irq.c | 16 ++++------------ + include/sysemu/kvm.h | 7 +++++-- + target/arm/kvm.c | 8 ++++---- + target/i386/kvm.c | 4 ++-- + target/mips/kvm.c | 2 +- + target/ppc/kvm.c | 2 +- + target/s390x/kvm.c | 2 +- + 9 files changed, 39 insertions(+), 30 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5007bdad96..b0250209f5 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1772,7 +1772,7 @@ void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi) + g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi)); + } + +-static void kvm_irqchip_create(MachineState *machine, KVMState *s) ++static void kvm_irqchip_create(KVMState *s) + { + int ret; + +@@ -1790,9 +1790,9 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s) + + /* First probe and see if there's a arch-specific hook to create the + * in-kernel irqchip for us */ +- ret = kvm_arch_irqchip_create(machine, s); ++ ret = kvm_arch_irqchip_create(s); + if (ret == 0) { +- if (machine_kernel_irqchip_split(machine)) { ++ if (kvm_kernel_irqchip_split()) { + perror("Split IRQ chip mode not supported."); + exit(1); + } else { +@@ -2076,7 +2076,7 @@ static int kvm_init(MachineState *ms) + } + + if (machine_kernel_irqchip_allowed(ms)) { +- kvm_irqchip_create(ms, s); ++ kvm_irqchip_create(s); + } + + if (kvm_eventfds_allowed) { +@@ -2966,6 +2966,21 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, + return false; + } + ++bool kvm_kernel_irqchip_allowed(void) ++{ ++ return machine_kernel_irqchip_allowed(current_machine); ++} ++ ++bool kvm_kernel_irqchip_required(void) ++{ ++ return machine_kernel_irqchip_required(current_machine); ++} ++ ++bool kvm_kernel_irqchip_split(void) ++{ ++ return machine_kernel_irqchip_split(current_machine); ++} ++ + static void kvm_accel_class_init(ObjectClass *oc, void *data) + { + AccelClass *ac = ACCEL_CLASS(oc); +diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c +index 91cd4c26f9..12b6a5b2a8 100644 +--- a/hw/ppc/e500.c ++++ b/hw/ppc/e500.c +@@ -793,7 +793,6 @@ static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms, + MemoryRegion *ccsr, + IrqLines *irqs) + { +- MachineState *machine = MACHINE(pms); + const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); + DeviceState *dev = NULL; + SysBusDevice *s; +@@ -801,10 +800,10 @@ static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms, + if (kvm_enabled()) { + Error *err = NULL; + +- if (machine_kernel_irqchip_allowed(machine)) { ++ if (kvm_kernel_irqchip_allowed()) { + dev = ppce500_init_mpic_kvm(pmc, irqs, &err); + } +- if (machine_kernel_irqchip_required(machine) && !dev) { ++ if (kvm_kernel_irqchip_required() && !dev) { + error_reportf_err(err, + "kernel_irqchip requested but unavailable: "); + exit(1); +diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c +index 9da423658a..f388d07bf9 100644 +--- a/hw/ppc/spapr_irq.c ++++ b/hw/ppc/spapr_irq.c +@@ -75,12 +75,11 @@ int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, + uint32_t nr_servers, + Error **errp) + { +- MachineState *machine = MACHINE(qdev_get_machine()); + Error *local_err = NULL; + +- if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { ++ if (kvm_enabled() && kvm_kernel_irqchip_allowed()) { + if (fn(intc, nr_servers, &local_err) < 0) { +- if (machine_kernel_irqchip_required(machine)) { ++ if (kvm_kernel_irqchip_required()) { + error_prepend(&local_err, + "kernel_irqchip requested but unavailable: "); + error_propagate(errp, local_err); +@@ -185,7 +184,7 @@ static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) + */ + if (kvm_enabled() && + spapr->irq == &spapr_irq_dual && +- machine_kernel_irqchip_required(machine) && ++ kvm_kernel_irqchip_required() && + xics_kvm_has_broken_disconnect(spapr)) { + error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on"); + return -1; +@@ -288,20 +287,13 @@ uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) + + void spapr_irq_init(SpaprMachineState *spapr, Error **errp) + { +- MachineState *machine = MACHINE(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + +- if (machine_kernel_irqchip_split(machine)) { ++ if (kvm_enabled() && kvm_kernel_irqchip_split()) { + error_setg(errp, "kernel_irqchip split mode not supported on pseries"); + return; + } + +- if (!kvm_enabled() && machine_kernel_irqchip_required(machine)) { +- error_setg(errp, +- "kernel_irqchip requested but only available with KVM"); +- return; +- } +- + if (spapr_irq_check(spapr, errp) < 0) { + return; + } +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 9fe233b9bf..aaf2a502e8 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -519,10 +519,13 @@ void kvm_pc_gsi_handler(void *opaque, int n, int level); + void kvm_pc_setup_irq_routing(bool pci_enabled); + void kvm_init_irq_routing(KVMState *s); + ++bool kvm_kernel_irqchip_allowed(void); ++bool kvm_kernel_irqchip_required(void); ++bool kvm_kernel_irqchip_split(void); ++ + /** + * kvm_arch_irqchip_create: + * @KVMState: The KVMState pointer +- * @MachineState: The MachineState pointer + * + * Allow architectures to create an in-kernel irq chip themselves. + * +@@ -530,7 +533,7 @@ void kvm_init_irq_routing(KVMState *s); + * 0: irq chip was not created + * > 0: irq chip was created + */ +-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s); ++int kvm_arch_irqchip_create(KVMState *s); + + /** + * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 4be9497402..418bcedc3e 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -861,11 +861,11 @@ void kvm_arch_init_irq_routing(KVMState *s) + { + } + +-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) ++int kvm_arch_irqchip_create(KVMState *s) + { +- if (machine_kernel_irqchip_split(ms)) { +- perror("-machine kernel_irqchip=split is not supported on ARM."); +- exit(1); ++ if (kvm_kernel_irqchip_split()) { ++ perror("-machine kernel_irqchip=split is not supported on ARM."); ++ exit(1); + } + + /* If we can create the VGIC using the newer device control API, we +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index fcc8f7d1f3..f5c17e0028 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -4532,10 +4532,10 @@ void kvm_arch_init_irq_routing(KVMState *s) + } + } + +-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) ++int kvm_arch_irqchip_create(KVMState *s) + { + int ret; +- if (machine_kernel_irqchip_split(ms)) { ++ if (kvm_kernel_irqchip_split()) { + ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24); + if (ret) { + error_report("Could not enable split irqchip mode: %s", +diff --git a/target/mips/kvm.c b/target/mips/kvm.c +index 578bc14625..de3e26ef1f 100644 +--- a/target/mips/kvm.c ++++ b/target/mips/kvm.c +@@ -57,7 +57,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + return 0; + } + +-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) ++int kvm_arch_irqchip_create(KVMState *s) + { + return 0; + } +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index c77f9848ec..461dc6dae1 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -152,7 +152,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + return 0; + } + +-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) ++int kvm_arch_irqchip_create(KVMState *s) + { + return 0; + } +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 84d7cadd09..c589ef9034 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -386,7 +386,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + return 0; + } + +-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) ++int kvm_arch_irqchip_create(KVMState *s) + { + return 0; + } +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch b/SOURCES/kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch new file mode 100644 index 0000000..a50bff9 --- /dev/null +++ b/SOURCES/kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch @@ -0,0 +1,241 @@ +From a4a984e67e276e643b8a51f39ca426d0967754a0 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 13 Jul 2020 14:24:51 -0400 +Subject: [PATCH 4/4] iotests/026: Move v3-exclusive test to new file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Max Reitz +Message-id: <20200713142451.289703-5-mreitz@redhat.com> +Patchwork-id: 97956 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 4/4] iotests/026: Move v3-exclusive test to new file +Bugzilla: 1807057 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +data_file does not work with v2, and we probably want 026 to keep +working for v2 images. Thus, open a new file for v3-exclusive error +path test cases. + +Fixes: 81311255f217859413c94f2cd9cebf2684bbda94 + (“iotests/026: Test EIO on allocation in a data-file”) +Signed-off-by: Max Reitz +Message-Id: <20200311140707.1243218-1-mreitz@redhat.com> +Reviewed-by: John Snow +Tested-by: John Snow +Signed-off-by: Max Reitz +(cherry picked from commit c264e5d2f9f5d73977eac8e5d084f727b3d07ea9) + +Conflicts: + tests/qemu-iotests/group + - As per usual. + +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/026 | 31 ----------- + tests/qemu-iotests/026.out | 6 -- + tests/qemu-iotests/026.out.nocache | 6 -- + tests/qemu-iotests/289 | 89 ++++++++++++++++++++++++++++++ + tests/qemu-iotests/289.out | 8 +++ + tests/qemu-iotests/group | 1 + + 6 files changed, 98 insertions(+), 43 deletions(-) + create mode 100755 tests/qemu-iotests/289 + create mode 100644 tests/qemu-iotests/289.out + +diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 +index c1c96a41d9..3afd708863 100755 +--- a/tests/qemu-iotests/026 ++++ b/tests/qemu-iotests/026 +@@ -237,37 +237,6 @@ $QEMU_IO -c "write 0 $CLUSTER_SIZE" "$BLKDBG_TEST_IMG" | _filter_qemu_io + + _check_test_img + +-echo +-echo === Avoid freeing external data clusters on failure === +-echo +- +-# Similar test as the last one, except we test what happens when there +-# is an error when writing to an external data file instead of when +-# writing to a preallocated zero cluster +-_make_test_img -o "data_file=$TEST_IMG.data_file" $CLUSTER_SIZE +- +-# Put blkdebug above the data-file, and a raw node on top of that so +-# that blkdebug will see a write_aio event and emit an error +-$QEMU_IO -c "write 0 $CLUSTER_SIZE" \ +- "json:{ +- 'driver': 'qcow2', +- 'file': { 'driver': 'file', 'filename': '$TEST_IMG' }, +- 'data-file': { +- 'driver': 'raw', +- 'file': { +- 'driver': 'blkdebug', +- 'config': '$TEST_DIR/blkdebug.conf', +- 'image': { +- 'driver': 'file', +- 'filename': '$TEST_IMG.data_file' +- } +- } +- } +- }" \ +- | _filter_qemu_io +- +-_check_test_img +- + # success, all done + echo "*** done" + rm -f $seq.full +diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out +index c1b3b58482..83989996ff 100644 +--- a/tests/qemu-iotests/026.out ++++ b/tests/qemu-iotests/026.out +@@ -653,10 +653,4 @@ wrote 1024/1024 bytes at offset 0 + 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + write failed: Input/output error + No errors were found on the image. +- +-=== Avoid freeing external data clusters on failure === +- +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file +-write failed: Input/output error +-No errors were found on the image. + *** done +diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache +index 8d5001648a..9359d26d7e 100644 +--- a/tests/qemu-iotests/026.out.nocache ++++ b/tests/qemu-iotests/026.out.nocache +@@ -661,10 +661,4 @@ wrote 1024/1024 bytes at offset 0 + 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + write failed: Input/output error + No errors were found on the image. +- +-=== Avoid freeing external data clusters on failure === +- +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file +-write failed: Input/output error +-No errors were found on the image. + *** done +diff --git a/tests/qemu-iotests/289 b/tests/qemu-iotests/289 +new file mode 100755 +index 0000000000..1c11d4030e +--- /dev/null ++++ b/tests/qemu-iotests/289 +@@ -0,0 +1,89 @@ ++#!/usr/bin/env bash ++# ++# qcow2 v3-exclusive error path testing ++# (026 tests paths common to v2 and v3) ++# ++# Copyright (C) 2020 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++seq=$(basename $0) ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++ rm "$TEST_DIR/blkdebug.conf" ++ rm -f "$TEST_IMG.data_file" ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++. ./common.pattern ++ ++_supported_fmt qcow2 ++_supported_proto file ++# This is a v3-exclusive test; ++# As for data_file, error paths often very much depend on whether ++# there is an external data file or not; so we create one exactly when ++# we want to test it ++_unsupported_imgopts 'compat=0.10' data_file ++ ++echo ++echo === Avoid freeing external data clusters on failure === ++echo ++ ++cat > "$TEST_DIR/blkdebug.conf" < +Date: Mon, 13 Jul 2020 14:24:50 -0400 +Subject: [PATCH 3/4] iotests/026: Test EIO on allocation in a data-file + +RH-Author: Max Reitz +Message-id: <20200713142451.289703-4-mreitz@redhat.com> +Patchwork-id: 97955 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 3/4] iotests/026: Test EIO on allocation in a data-file +Bugzilla: 1807057 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Test what happens when writing data to an external data file, where the +write requires an L2 entry to be allocated, but the data write fails. + +Signed-off-by: Max Reitz +Message-Id: <20200225143130.111267-4-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 81311255f217859413c94f2cd9cebf2684bbda94) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/026 | 32 ++++++++++++++++++++++++++++++ + tests/qemu-iotests/026.out | 6 ++++++ + tests/qemu-iotests/026.out.nocache | 6 ++++++ + 3 files changed, 44 insertions(+) + +diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 +index d89729697f..c1c96a41d9 100755 +--- a/tests/qemu-iotests/026 ++++ b/tests/qemu-iotests/026 +@@ -30,6 +30,7 @@ _cleanup() + { + _cleanup_test_img + rm "$TEST_DIR/blkdebug.conf" ++ rm -f "$TEST_IMG.data_file" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + +@@ -236,6 +237,37 @@ $QEMU_IO -c "write 0 $CLUSTER_SIZE" "$BLKDBG_TEST_IMG" | _filter_qemu_io + + _check_test_img + ++echo ++echo === Avoid freeing external data clusters on failure === ++echo ++ ++# Similar test as the last one, except we test what happens when there ++# is an error when writing to an external data file instead of when ++# writing to a preallocated zero cluster ++_make_test_img -o "data_file=$TEST_IMG.data_file" $CLUSTER_SIZE ++ ++# Put blkdebug above the data-file, and a raw node on top of that so ++# that blkdebug will see a write_aio event and emit an error ++$QEMU_IO -c "write 0 $CLUSTER_SIZE" \ ++ "json:{ ++ 'driver': 'qcow2', ++ 'file': { 'driver': 'file', 'filename': '$TEST_IMG' }, ++ 'data-file': { ++ 'driver': 'raw', ++ 'file': { ++ 'driver': 'blkdebug', ++ 'config': '$TEST_DIR/blkdebug.conf', ++ 'image': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG.data_file' ++ } ++ } ++ } ++ }" \ ++ | _filter_qemu_io ++ ++_check_test_img ++ + # success, all done + echo "*** done" + rm -f $seq.full +diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out +index 83989996ff..c1b3b58482 100644 +--- a/tests/qemu-iotests/026.out ++++ b/tests/qemu-iotests/026.out +@@ -653,4 +653,10 @@ wrote 1024/1024 bytes at offset 0 + 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + write failed: Input/output error + No errors were found on the image. ++ ++=== Avoid freeing external data clusters on failure === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file ++write failed: Input/output error ++No errors were found on the image. + *** done +diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache +index 9359d26d7e..8d5001648a 100644 +--- a/tests/qemu-iotests/026.out.nocache ++++ b/tests/qemu-iotests/026.out.nocache +@@ -661,4 +661,10 @@ wrote 1024/1024 bytes at offset 0 + 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + write failed: Input/output error + No errors were found on the image. ++ ++=== Avoid freeing external data clusters on failure === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file ++write failed: Input/output error ++No errors were found on the image. + *** done +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch b/SOURCES/kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch new file mode 100644 index 0000000..36d609c --- /dev/null +++ b/SOURCES/kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch @@ -0,0 +1,102 @@ +From b1035096f2d46e2146704d1db9581c6d2131d1f4 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 13 Jul 2020 14:24:49 -0400 +Subject: [PATCH 2/4] iotests/026: Test EIO on preallocated zero cluster + +RH-Author: Max Reitz +Message-id: <20200713142451.289703-3-mreitz@redhat.com> +Patchwork-id: 97953 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/4] iotests/026: Test EIO on preallocated zero cluster +Bugzilla: 1807057 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Test what happens when writing data to a preallocated zero cluster, but +the data write fails. + +Signed-off-by: Max Reitz +Message-Id: <20200225143130.111267-3-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 31ab00f3747c00fdbb9027cea644b40dd1405480) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/026 | 21 +++++++++++++++++++++ + tests/qemu-iotests/026.out | 10 ++++++++++ + tests/qemu-iotests/026.out.nocache | 10 ++++++++++ + 3 files changed, 41 insertions(+) + +diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 +index 3430029ed6..d89729697f 100755 +--- a/tests/qemu-iotests/026 ++++ b/tests/qemu-iotests/026 +@@ -215,6 +215,27 @@ _make_test_img 64M + $QEMU_IO -c "write 0 1M" -c "write 0 1M" "$BLKDBG_TEST_IMG" | _filter_qemu_io + _check_test_img + ++echo ++echo === Avoid freeing preallocated zero clusters on failure === ++echo ++ ++cat > "$TEST_DIR/blkdebug.conf" < +Date: Wed, 3 Jun 2020 16:03:17 +0100 +Subject: [PATCH 18/26] iotests/055: refactor compressed backup to vmdk + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-4-kwolf@redhat.com> +Patchwork-id: 97104 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 03/11] iotests/055: refactor compressed backup to vmdk +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +From: Vladimir Sementsov-Ogievskiy + +Instead of looping in each test, let's better refactor vmdk target case +as a subclass. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200430124713.3067-6-vsementsov@virtuozzo.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8e8372944e5e097e98844b4db10f867689065e16) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/055 | 70 ++++++++++++++++++++++++---------------------- + tests/qemu-iotests/055.out | 4 +-- + 2 files changed, 39 insertions(+), 35 deletions(-) + +diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 +index eb50c9f..8666601 100755 +--- a/tests/qemu-iotests/055 ++++ b/tests/qemu-iotests/055 +@@ -450,10 +450,9 @@ class TestSingleTransaction(iotests.QMPTestCase): + self.assert_no_active_block_jobs() + + +-class TestDriveCompression(iotests.QMPTestCase): ++class TestCompressedToQcow2(iotests.QMPTestCase): + image_len = 64 * 1024 * 1024 # MB +- fmt_supports_compression = [{'type': 'qcow2', 'args': ()}, +- {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')}] ++ target_fmt = {'type': 'qcow2', 'args': ()} + + def tearDown(self): + self.vm.shutdown() +@@ -463,19 +462,20 @@ class TestDriveCompression(iotests.QMPTestCase): + except OSError: + pass + +- def do_prepare_drives(self, fmt, args, attach_target): ++ def do_prepare_drives(self, attach_target): + self.vm = iotests.VM().add_drive('blkdebug::' + test_img) + +- qemu_img('create', '-f', fmt, blockdev_target_img, +- str(TestDriveCompression.image_len), *args) ++ qemu_img('create', '-f', self.target_fmt['type'], blockdev_target_img, ++ str(self.image_len), *self.target_fmt['args']) + if attach_target: + self.vm.add_drive(blockdev_target_img, +- img_format=fmt, interface="none") ++ img_format=self.target_fmt['type'], ++ interface="none") + + self.vm.launch() + +- def do_test_compress_complete(self, cmd, format, attach_target, **args): +- self.do_prepare_drives(format['type'], format['args'], attach_target) ++ def do_test_compress_complete(self, cmd, attach_target, **args): ++ self.do_prepare_drives(attach_target) + + self.assert_no_active_block_jobs() + +@@ -486,21 +486,21 @@ class TestDriveCompression(iotests.QMPTestCase): + + self.vm.shutdown() + self.assertTrue(iotests.compare_images(test_img, blockdev_target_img, +- iotests.imgfmt, format['type']), ++ iotests.imgfmt, ++ self.target_fmt['type']), + 'target image does not match source after backup') + + def test_complete_compress_drive_backup(self): +- for format in TestDriveCompression.fmt_supports_compression: +- self.do_test_compress_complete('drive-backup', format, False, +- target=blockdev_target_img, mode='existing') ++ self.do_test_compress_complete('drive-backup', False, ++ target=blockdev_target_img, ++ mode='existing') + + def test_complete_compress_blockdev_backup(self): +- for format in TestDriveCompression.fmt_supports_compression: +- self.do_test_compress_complete('blockdev-backup', format, True, +- target='drive1') ++ self.do_test_compress_complete('blockdev-backup', ++ True, target='drive1') + +- def do_test_compress_cancel(self, cmd, format, attach_target, **args): +- self.do_prepare_drives(format['type'], format['args'], attach_target) ++ def do_test_compress_cancel(self, cmd, attach_target, **args): ++ self.do_prepare_drives(attach_target) + + self.assert_no_active_block_jobs() + +@@ -514,17 +514,16 @@ class TestDriveCompression(iotests.QMPTestCase): + self.vm.shutdown() + + def test_compress_cancel_drive_backup(self): +- for format in TestDriveCompression.fmt_supports_compression: +- self.do_test_compress_cancel('drive-backup', format, False, +- target=blockdev_target_img, mode='existing') ++ self.do_test_compress_cancel('drive-backup', False, ++ target=blockdev_target_img, ++ mode='existing') + + def test_compress_cancel_blockdev_backup(self): +- for format in TestDriveCompression.fmt_supports_compression: +- self.do_test_compress_cancel('blockdev-backup', format, True, +- target='drive1') ++ self.do_test_compress_cancel('blockdev-backup', True, ++ target='drive1') + +- def do_test_compress_pause(self, cmd, format, attach_target, **args): +- self.do_prepare_drives(format['type'], format['args'], attach_target) ++ def do_test_compress_pause(self, cmd, attach_target, **args): ++ self.do_prepare_drives(attach_target) + + self.assert_no_active_block_jobs() + +@@ -550,18 +549,23 @@ class TestDriveCompression(iotests.QMPTestCase): + + self.vm.shutdown() + self.assertTrue(iotests.compare_images(test_img, blockdev_target_img, +- iotests.imgfmt, format['type']), ++ iotests.imgfmt, ++ self.target_fmt['type']), + 'target image does not match source after backup') + + def test_compress_pause_drive_backup(self): +- for format in TestDriveCompression.fmt_supports_compression: +- self.do_test_compress_pause('drive-backup', format, False, +- target=blockdev_target_img, mode='existing') ++ self.do_test_compress_pause('drive-backup', False, ++ target=blockdev_target_img, ++ mode='existing') + + def test_compress_pause_blockdev_backup(self): +- for format in TestDriveCompression.fmt_supports_compression: +- self.do_test_compress_pause('blockdev-backup', format, True, +- target='drive1') ++ self.do_test_compress_pause('blockdev-backup', True, ++ target='drive1') ++ ++ ++class TestCompressedToVmdk(TestCompressedToQcow2): ++ target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')} ++ + + if __name__ == '__main__': + iotests.main(supported_fmts=['raw', 'qcow2'], +diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out +index 5ce2f9a..5c26d15 100644 +--- a/tests/qemu-iotests/055.out ++++ b/tests/qemu-iotests/055.out +@@ -1,5 +1,5 @@ +-.............................. ++.................................... + ---------------------------------------------------------------------- +-Ran 30 tests ++Ran 36 tests + + OK +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch b/SOURCES/kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch new file mode 100644 index 0000000..260d511 --- /dev/null +++ b/SOURCES/kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch @@ -0,0 +1,45 @@ +From 9a0ca4797cbd029dab9209d88f8c81b78ded8fd0 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:18 +0100 +Subject: [PATCH 19/26] iotests/055: skip vmdk target tests if vmdk is not + whitelisted + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-5-kwolf@redhat.com> +Patchwork-id: 97101 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 04/11] iotests/055: skip vmdk target tests if vmdk is not whitelisted +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +From: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200430124713.3067-7-vsementsov@virtuozzo.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 761cd2e791eae38c3d08ea5f83309ce58bb85ff7) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/055 | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 +index 8666601..c9cdc06 100755 +--- a/tests/qemu-iotests/055 ++++ b/tests/qemu-iotests/055 +@@ -566,6 +566,10 @@ class TestCompressedToQcow2(iotests.QMPTestCase): + class TestCompressedToVmdk(TestCompressedToQcow2): + target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')} + ++ @iotests.skip_if_unsupported(['vmdk']) ++ def setUp(self): ++ pass ++ + + if __name__ == '__main__': + iotests.main(supported_fmts=['raw', 'qcow2'], +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch b/SOURCES/kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch new file mode 100644 index 0000000..c71bcba --- /dev/null +++ b/SOURCES/kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch @@ -0,0 +1,387 @@ +From 2202321b549dda551190d919a5a1cbee0fab8c90 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:22 +0100 +Subject: [PATCH 23/26] iotests/109: Don't mirror with mismatched size + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-9-kwolf@redhat.com> +Patchwork-id: 97105 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 08/11] iotests/109: Don't mirror with mismatched size +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +This patch makes the raw image the same size as the file in a different +format that is mirrored as raw to it to avoid errors when mirror starts +to enforce that source and target are the same size. + +We check only that the first 512 bytes are zeroed (instead of 64k) +because some image formats create image files that are smaller than 64k, +so trying to read 64k would result in I/O errors. Apart from this, 512 +is more appropriate anyway because the raw format driver protects +specifically the first 512 bytes. + +Signed-off-by: Kevin Wolf +Message-Id: <20200511135825.219437-2-kwolf@redhat.com> +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit ffa41a62d0b0e6d91f2071328befa046d56993e1) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/109 | 10 +++--- + tests/qemu-iotests/109.out | 74 +++++++++++++++++----------------------- + tests/qemu-iotests/common.filter | 5 +++ + 3 files changed, 41 insertions(+), 48 deletions(-) + +diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 +index 9897ceb..190c35e 100755 +--- a/tests/qemu-iotests/109 ++++ b/tests/qemu-iotests/109 +@@ -76,14 +76,14 @@ for fmt in qcow qcow2 qed vdi vmdk vpc; do + echo "=== Writing a $fmt header into raw ===" + echo + +- _make_test_img 64M + TEST_IMG="$TEST_IMG.src" IMGFMT=$fmt _make_test_img 64M ++ _make_test_img $(du -b "$TEST_IMG.src" | cut -f1) | _filter_img_create_size + + # This first test should fail: The image format was probed, we may not + # write an image header at the start of the image + run_qemu "$TEST_IMG" "$TEST_IMG.src" "" "BLOCK_JOB_ERROR" | + _filter_block_job_len +- $QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io ++ $QEMU_IO -c 'read -P 0 0 512' "$TEST_IMG" | _filter_qemu_io + + + # When raw was explicitly specified, the same must succeed +@@ -102,12 +102,12 @@ for sample_img in empty.bochs iotest-dirtylog-10G-4M.vhdx parallels-v1 \ + + # Can't use _use_sample_img because that isn't designed to be used multiple + # times and it overwrites $TEST_IMG (both breaks cleanup) +- _make_test_img 64M + bzcat "$SAMPLE_IMG_DIR/$sample_img.bz2" > "$TEST_IMG.src" ++ _make_test_img $(du -b "$TEST_IMG.src" | cut -f1) | _filter_img_create_size + + run_qemu "$TEST_IMG" "$TEST_IMG.src" "" "BLOCK_JOB_ERROR" | + _filter_block_job_offset | _filter_block_job_len +- $QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io ++ $QEMU_IO -c 'read -P 0 0 512' "$TEST_IMG" | _filter_qemu_io + + run_qemu "$TEST_IMG" "$TEST_IMG.src" "'format': 'raw'," "BLOCK_JOB_READY" + $QEMU_IMG compare -f raw -F raw "$TEST_IMG" "$TEST_IMG.src" +@@ -118,8 +118,8 @@ echo "=== Write legitimate MBR into raw ===" + echo + + for sample_img in grub_mbr.raw; do +- _make_test_img 64M + bzcat "$SAMPLE_IMG_DIR/$sample_img.bz2" > "$TEST_IMG.src" ++ _make_test_img $(du -b "$TEST_IMG.src" | cut -f1) | _filter_img_create_size + + run_qemu "$TEST_IMG" "$TEST_IMG.src" "" "BLOCK_JOB_READY" + $QEMU_IMG compare -f raw -F raw "$TEST_IMG" "$TEST_IMG.src" +diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out +index 884f65f..ad739df 100644 +--- a/tests/qemu-iotests/109.out ++++ b/tests/qemu-iotests/109.out +@@ -2,8 +2,8 @@ QA output created by 109 + + === Writing a qcow header into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -23,8 +23,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -43,13 +43,12 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Writing a qcow2 header into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -69,8 +68,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -89,13 +88,12 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Writing a qed header into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -115,8 +113,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -135,13 +133,12 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Writing a vdi header into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -161,8 +158,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -181,13 +178,12 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Writing a vmdk header into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -207,8 +203,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -227,13 +223,12 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Writing a vpc header into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -253,8 +248,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -273,12 +268,11 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Copying sample image empty.bochs into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -298,8 +292,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -318,12 +312,11 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Copying sample image iotest-dirtylog-10G-4M.vhdx into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -343,8 +336,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -363,12 +356,11 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Copying sample image parallels-v1 into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -388,8 +380,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -408,12 +400,11 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Copying sample image simple-pattern.cloop into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -433,8 +424,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"quit"} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-read 65536/65536 bytes at offset 0 +-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -453,12 +444,11 @@ read 65536/65536 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + + === Write legitimate MBR into raw === + +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} +@@ -480,7 +470,6 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + { 'execute': 'qmp_capabilities' } + {"return": {}} +@@ -500,6 +489,5 @@ Images are identical. + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} +-Warning: Image size mismatch! + Images are identical. + *** done +diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter +index 5367dee..c8e8663 100644 +--- a/tests/qemu-iotests/common.filter ++++ b/tests/qemu-iotests/common.filter +@@ -149,6 +149,11 @@ _filter_img_create() + -e "s# force_size=\\(on\\|off\\)##g" + } + ++_filter_img_create_size() ++{ ++ $SED -e "s# size=[0-9]\\+# size=SIZE#g" ++} ++ + _filter_img_info() + { + if [[ "$1" == "--format-specific" ]]; then +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch b/SOURCES/kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch new file mode 100644 index 0000000..ef8807c --- /dev/null +++ b/SOURCES/kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch @@ -0,0 +1,120 @@ +From 104c8f6210bf573cf39c2a14cdb0b081baaaa3f0 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:23 +0100 +Subject: [PATCH 24/26] iotests/229: Use blkdebug to inject an error + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-10-kwolf@redhat.com> +Patchwork-id: 97108 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 09/11] iotests/229: Use blkdebug to inject an error +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +229 relies on the mirror running into an I/O error when the target is +smaller than the source. After changing mirror to catch this condition +while starting the job, this test case won't get a job that is paused +for an I/O error any more. Use blkdebug instead to inject an error. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Message-Id: <20200511135825.219437-3-kwolf@redhat.com> +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit d89ac3cf305b28c024a76805a84d75c0ee1e786f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/229 | 18 +++++++++++++----- + tests/qemu-iotests/229.out | 6 +++--- + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229 +index e18a464..511fbc0 100755 +--- a/tests/qemu-iotests/229 ++++ b/tests/qemu-iotests/229 +@@ -32,6 +32,7 @@ _cleanup() + _cleanup_qemu + _cleanup_test_img + rm -f "$TEST_IMG" "$DEST_IMG" ++ rm -f "$TEST_DIR/blkdebug.conf" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + +@@ -48,11 +49,10 @@ _supported_os Linux + + DEST_IMG="$TEST_DIR/d.$IMGFMT" + TEST_IMG="$TEST_DIR/b.$IMGFMT" ++BLKDEBUG_CONF="$TEST_DIR/blkdebug.conf" + + _make_test_img 2M +- +-# destination for mirror will be too small, causing error +-TEST_IMG=$DEST_IMG _make_test_img 1M ++TEST_IMG=$DEST_IMG _make_test_img 2M + + $QEMU_IO -c 'write 0 2M' "$TEST_IMG" | _filter_qemu_io + +@@ -66,11 +66,18 @@ echo + echo '=== Starting drive-mirror, causing error & stop ===' + echo + ++cat > "$BLKDEBUG_CONF" < +Date: Fri, 13 Mar 2020 12:34:38 +0000 +Subject: [PATCH 18/20] iotests: Add iothread cases to 155 + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-13-kwolf@redhat.com> +Patchwork-id: 94289 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 12/13] iotests: Add iothread cases to 155 +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +This patch adds test cases for attaching the backing chain to a mirror +job target right before finalising the job, where the image is in a +non-mainloop AioContext (i.e. the backing chain needs to be moved to the +AioContext of the mirror target). + +This requires switching the test case from virtio-blk to virtio-scsi +because virtio-blk only actually starts using the iothreads when the +guest driver initialises the device (which never happens in a test case +without a guest OS). virtio-scsi always keeps its block nodes in the +AioContext of the the requested iothread without guest interaction. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-7-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 6a5f6403a11307794ec79d277a065c137cfc12b2) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 32 +++++++++++++++++++++++--------- + tests/qemu-iotests/155.out | 4 ++-- + 2 files changed, 25 insertions(+), 11 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index 3053e50..b552d1f 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -49,11 +49,14 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + # chain opened right away. If False, blockdev-add + # opens it without a backing file and job completion + # is supposed to open the backing chain. ++# use_iothread: If True, an iothread is configured for the virtio-blk device ++# that uses the image being mirrored + + class BaseClass(iotests.QMPTestCase): + target_blockdev_backing = None + target_real_backing = None + target_open_with_backing = True ++ use_iothread = False + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') +@@ -69,7 +72,16 @@ class BaseClass(iotests.QMPTestCase): + 'file': {'driver': 'file', + 'filename': source_img}} + self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev)) +- self.vm.add_device('virtio-blk,id=qdev0,drive=source') ++ ++ if self.use_iothread: ++ self.vm.add_object('iothread,id=iothread0') ++ iothread = ",iothread=iothread0" ++ else: ++ iothread = "" ++ ++ self.vm.add_device('virtio-scsi%s' % iothread) ++ self.vm.add_device('scsi-hd,id=qdev0,drive=source') ++ + self.vm.launch() + + self.assertIntactSourceBackingChain() +@@ -182,24 +194,21 @@ class MirrorBaseClass(BaseClass): + def testFull(self): + self.runMirror('full') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, None) + self.assertIntactSourceBackingChain() + + def testTop(self): + self.runMirror('top') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, back2_img) + self.assertIntactSourceBackingChain() + + def testNone(self): + self.runMirror('none') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, source_img) + self.assertIntactSourceBackingChain() + +@@ -252,6 +261,9 @@ class TestBlockdevMirrorReopen(MirrorBaseClass): + backing="backing") + self.assert_qmp(result, 'return', {}) + ++class TestBlockdevMirrorReopenIothread(TestBlockdevMirrorReopen): ++ use_iothread = True ++ + # Attach the backing chain only during completion, with blockdev-snapshot + class TestBlockdevMirrorSnapshot(MirrorBaseClass): + cmd = 'blockdev-mirror' +@@ -268,6 +280,9 @@ class TestBlockdevMirrorSnapshot(MirrorBaseClass): + overlay="target") + self.assert_qmp(result, 'return', {}) + ++class TestBlockdevMirrorSnapshotIothread(TestBlockdevMirrorSnapshot): ++ use_iothread = True ++ + class TestCommit(BaseClass): + existing = False + +@@ -283,8 +298,7 @@ class TestCommit(BaseClass): + + self.vm.event_wait('BLOCK_JOB_COMPLETED') + +- node = self.findBlockNode(None, +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode(None, 'qdev0') + self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', + back1_img) + self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', +diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out +index 4fd1c2d..ed714d5 100644 +--- a/tests/qemu-iotests/155.out ++++ b/tests/qemu-iotests/155.out +@@ -1,5 +1,5 @@ +-......................... ++............................... + ---------------------------------------------------------------------- +-Ran 25 tests ++Ran 31 tests + + OK +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch b/SOURCES/kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch new file mode 100644 index 0000000..6bdf130 --- /dev/null +++ b/SOURCES/kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch @@ -0,0 +1,236 @@ +From adda561394bb07c13ef3f2712b36704790530891 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:15 +0100 +Subject: [PATCH 16/26] iotests: Add more "skip_if_unsupported" statements to + the python tests + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-2-kwolf@redhat.com> +Patchwork-id: 97099 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 01/11] iotests: Add more "skip_if_unsupported" statements to the python tests +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +From: Thomas Huth + +The python code already contains a possibility to skip tests if the +corresponding driver is not available in the qemu binary - use it +in more spots to avoid that the tests are failing if the driver has +been disabled. + +While we're at it, we can now also remove some of the old checks that +were using iotests.supports_quorum() - and which were apparently not +working as expected since the tests aborted instead of being skipped +when "quorum" was missing in the QEMU binary. + +Signed-off-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 9442bebe6e67a5d038bbf2572b79e7b59d202a23) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/030 | 4 +--- + tests/qemu-iotests/040 | 2 ++ + tests/qemu-iotests/041 | 39 +++------------------------------------ + tests/qemu-iotests/245 | 2 ++ + 4 files changed, 8 insertions(+), 39 deletions(-) + +diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 +index f3766f2..bddbb30 100755 +--- a/tests/qemu-iotests/030 ++++ b/tests/qemu-iotests/030 +@@ -530,6 +530,7 @@ class TestQuorum(iotests.QMPTestCase): + children = [] + backing = [] + ++ @iotests.skip_if_unsupported(['quorum']) + def setUp(self): + opts = ['driver=quorum', 'vote-threshold=2'] + +@@ -560,9 +561,6 @@ class TestQuorum(iotests.QMPTestCase): + os.remove(img) + + def test_stream_quorum(self): +- if not iotests.supports_quorum(): +- return +- + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.children[0]), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.backing[0]), + 'image file map matches backing file before streaming') +diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 +index 762ad1e..74f62c3 100755 +--- a/tests/qemu-iotests/040 ++++ b/tests/qemu-iotests/040 +@@ -106,6 +106,7 @@ class TestSingleDrive(ImageCommitTestCase): + self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) + self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) + ++ @iotests.skip_if_unsupported(['throttle']) + def test_commit_with_filter_and_quit(self): + result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg') + self.assert_qmp(result, 'return', {}) +@@ -125,6 +126,7 @@ class TestSingleDrive(ImageCommitTestCase): + self.has_quit = True + + # Same as above, but this time we add the filter after starting the job ++ @iotests.skip_if_unsupported(['throttle']) + def test_commit_plus_filter_and_quit(self): + result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg') + self.assert_qmp(result, 'return', {}) +diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 +index 8568426..a543b15 100755 +--- a/tests/qemu-iotests/041 ++++ b/tests/qemu-iotests/041 +@@ -871,6 +871,7 @@ class TestRepairQuorum(iotests.QMPTestCase): + image_len = 1 * 1024 * 1024 # MB + IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ] + ++ @iotests.skip_if_unsupported(['quorum']) + def setUp(self): + self.vm = iotests.VM() + +@@ -891,9 +892,8 @@ class TestRepairQuorum(iotests.QMPTestCase): + #assemble the quorum block device from the individual files + args = { "driver": "quorum", "node-name": "quorum0", + "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } +- if iotests.supports_quorum(): +- result = self.vm.qmp("blockdev-add", **args) +- self.assert_qmp(result, 'return', {}) ++ result = self.vm.qmp("blockdev-add", **args) ++ self.assert_qmp(result, 'return', {}) + + + def tearDown(self): +@@ -906,9 +906,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + pass + + def test_complete(self): +- if not iotests.supports_quorum(): +- return +- + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', +@@ -925,9 +922,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + 'target image does not match source after mirroring') + + def test_cancel(self): +- if not iotests.supports_quorum(): +- return +- + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', +@@ -942,9 +936,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + self.vm.shutdown() + + def test_cancel_after_ready(self): +- if not iotests.supports_quorum(): +- return +- + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', +@@ -961,9 +952,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + 'target image does not match source after mirroring') + + def test_pause(self): +- if not iotests.supports_quorum(): +- return +- + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', +@@ -989,9 +977,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + 'target image does not match source after mirroring') + + def test_medium_not_found(self): +- if not iotests.supports_quorum(): +- return +- + if iotests.qemu_default_machine != 'pc': + return + +@@ -1003,9 +988,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_image_not_found(self): +- if not iotests.supports_quorum(): +- return +- + result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', + sync='full', node_name='repair0', replaces='img1', + mode='existing', target=quorum_repair_img, +@@ -1013,9 +995,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_device_not_found(self): +- if not iotests.supports_quorum(): +- return +- + result = self.vm.qmp('drive-mirror', job_id='job0', + device='nonexistent', sync='full', + node_name='repair0', +@@ -1024,9 +1003,6 @@ class TestRepairQuorum(iotests.QMPTestCase): + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_wrong_sync_mode(self): +- if not iotests.supports_quorum(): +- return +- + result = self.vm.qmp('drive-mirror', device='quorum0', job_id='job0', + node_name='repair0', + replaces='img1', +@@ -1034,27 +1010,18 @@ class TestRepairQuorum(iotests.QMPTestCase): + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_no_node_name(self): +- if not iotests.supports_quorum(): +- return +- + result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', + sync='full', replaces='img1', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_nonexistent_replaces(self): +- if not iotests.supports_quorum(): +- return +- + result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', + sync='full', node_name='repair0', replaces='img77', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_after_a_quorum_snapshot(self): +- if not iotests.supports_quorum(): +- return +- + result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1', + snapshot_file=quorum_snapshot_file, + snapshot_node_name="snap1"); +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index 919131d..ed972f9 100644 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -478,6 +478,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): + # This test verifies that we can't change the children of a block + # device during a reopen operation in a way that would create + # cycles in the node graph ++ @iotests.skip_if_unsupported(['blkverify']) + def test_graph_cycles(self): + opts = [] + +@@ -534,6 +535,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assert_qmp(result, 'return', {}) + + # Misc reopen tests with different block drivers ++ @iotests.skip_if_unsupported(['quorum', 'throttle']) + def test_misc_drivers(self): + #################### + ###### quorum ###### +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Add-qemu_io_log.patch b/SOURCES/kvm-iotests-Add-qemu_io_log.patch new file mode 100644 index 0000000..a65bc5a --- /dev/null +++ b/SOURCES/kvm-iotests-Add-qemu_io_log.patch @@ -0,0 +1,48 @@ +From 2be333e847c01397bb6a92b2e4c60e904957675d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:37 +0100 +Subject: [PATCH 09/17] iotests: Add qemu_io_log() + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-9-kwolf@redhat.com> +Patchwork-id: 97451 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 08/11] iotests: Add qemu_io_log() +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +Add a function that runs qemu-io and logs the output with the +appropriate filters applied. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit a96f0350e3d95c98f2bff1863d14493af5c1d360) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index be20d56..7a9c779 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -162,6 +162,11 @@ def qemu_io(*args): + sys.stderr.write('qemu-io received signal %i: %s\n' % (-exitcode, ' '.join(args))) + return subp.communicate()[0] + ++def qemu_io_log(*args): ++ result = qemu_io(*args) ++ log(result, filters=[filter_testfiles, filter_qemu_io]) ++ return result ++ + def qemu_io_silent(*args): + '''Run qemu-io and return the exit code, suppressing stdout''' + args = qemu_io_args + list(args) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch b/SOURCES/kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch new file mode 100644 index 0000000..6144043 --- /dev/null +++ b/SOURCES/kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch @@ -0,0 +1,253 @@ +From eccae2f252513d2965ef919022c3ed068da275bd Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:20 +0100 +Subject: [PATCH 15/26] iotests: Add test 291 to for qemu-img bitmap coverage + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-13-eblake@redhat.com> +Patchwork-id: 97079 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 12/12] iotests: Add test 291 to for qemu-img bitmap coverage +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Add a new test covering the 'qemu-img bitmap' subcommand, as well as +'qemu-img convert --bitmaps', both added in recent patches. + +Signed-off-by: Eric Blake +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200521192137.1120211-6-eblake@redhat.com> +(cherry picked from commit cf2d1203dcfc2bf964453d83a2302231ce77f2dc) + +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + tests/qemu-iotests/group - context: other tests not backported + tests/qemu-iotests/291.out - zstd compression not backported +Signed-off-by: Eric Blake + +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/291 | 112 +++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/291.out | 78 +++++++++++++++++++++++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 191 insertions(+) + create mode 100755 tests/qemu-iotests/291 + create mode 100644 tests/qemu-iotests/291.out + +diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291 +new file mode 100755 +index 0000000..3ca83b9 +--- /dev/null ++++ b/tests/qemu-iotests/291 +@@ -0,0 +1,112 @@ ++#!/usr/bin/env bash ++# ++# Test qemu-img bitmap handling ++# ++# Copyright (C) 2018-2020 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++ nbd_server_stop ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++. ./common.nbd ++ ++_supported_fmt qcow2 ++_supported_proto file ++_supported_os Linux ++_require_command QEMU_NBD ++ ++echo ++echo "=== Initial image setup ===" ++echo ++ ++# Create backing image with one bitmap ++TEST_IMG="$TEST_IMG.base" _make_test_img 10M ++$QEMU_IMG bitmap --add -f $IMGFMT "$TEST_IMG.base" b0 ++$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io ++ ++# Create initial image and populate two bitmaps: one active, one inactive. ++ORIG_IMG=$TEST_IMG ++TEST_IMG=$TEST_IMG.orig ++_make_test_img -b "$ORIG_IMG.base" -F $IMGFMT 10M ++$QEMU_IO -c 'w 0 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io ++$QEMU_IMG bitmap --add -g 512k -f $IMGFMT "$TEST_IMG" b1 ++$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b2 ++$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io ++$QEMU_IMG bitmap --clear -f $IMGFMT "$TEST_IMG" b1 ++$QEMU_IO -c 'w 1M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io ++$QEMU_IMG bitmap --disable -f $IMGFMT "$TEST_IMG" b1 ++$QEMU_IMG bitmap --enable -f $IMGFMT "$TEST_IMG" b2 ++$QEMU_IO -c 'w 2M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io ++ ++echo ++echo "=== Bitmap preservation not possible to non-qcow2 ===" ++echo ++ ++TEST_IMG=$ORIG_IMG ++$QEMU_IMG convert --bitmaps -O raw "$TEST_IMG.orig" "$TEST_IMG" && ++ echo "unexpected success" ++ ++echo ++echo "=== Convert with bitmap preservation ===" ++echo ++ ++# Only bitmaps from the active layer are copied ++$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG.orig" "$TEST_IMG" ++$QEMU_IMG info "$TEST_IMG" | _filter_img_info --format-specific ++# But we can also merge in bitmaps from other layers. This test is a bit ++# contrived to cover more code paths, in reality, you could merge directly ++# into b0 without going through tmp ++$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b0 ++$QEMU_IMG bitmap --add --merge b0 -b "$TEST_IMG.base" -F $IMGFMT \ ++ -f $IMGFMT "$TEST_IMG" tmp ++$QEMU_IMG bitmap --merge tmp -f $IMGFMT "$TEST_IMG" b0 ++$QEMU_IMG bitmap --remove --image-opts \ ++ driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp ++$QEMU_IMG info "$TEST_IMG" | _filter_img_info --format-specific ++ ++echo ++echo "=== Check bitmap contents ===" ++echo ++ ++# x-dirty-bitmap is a hack for reading bitmaps; it abuses block status to ++# report "data":false for portions of the bitmap which are set ++IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket" ++nbd_server_start_unix_socket -r -f qcow2 -B b0 "$TEST_IMG" ++$QEMU_IMG map --output=json --image-opts \ ++ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b0" | _filter_qemu_img_map ++nbd_server_start_unix_socket -r -f qcow2 -B b1 "$TEST_IMG" ++$QEMU_IMG map --output=json --image-opts \ ++ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b1" | _filter_qemu_img_map ++nbd_server_start_unix_socket -r -f qcow2 -B b2 "$TEST_IMG" ++$QEMU_IMG map --output=json --image-opts \ ++ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map ++ ++# success, all done ++echo '*** done' ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out +new file mode 100644 +index 0000000..14e5cfc +--- /dev/null ++++ b/tests/qemu-iotests/291.out +@@ -0,0 +1,78 @@ ++QA output created by 291 ++ ++=== Initial image setup === ++ ++Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=10485760 ++wrote 1048576/1048576 bytes at offset 3145728 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=10485760 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 1048576/1048576 bytes at offset 3145728 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 1048576/1048576 bytes at offset 1048576 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 1048576/1048576 bytes at offset 2097152 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++=== Bitmap preservation not possible to non-qcow2 === ++ ++qemu-img: Format driver 'raw' does not support bitmaps ++ ++=== Convert with bitmap preservation === ++ ++image: TEST_DIR/t.IMGFMT ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++disk size: 4.39 MiB ++Format specific information: ++ compat: 1.1 ++ lazy refcounts: false ++ bitmaps: ++ [0]: ++ flags: ++ name: b1 ++ granularity: 524288 ++ [1]: ++ flags: ++ [0]: auto ++ name: b2 ++ granularity: 65536 ++ refcount bits: 16 ++ corrupt: false ++image: TEST_DIR/t.IMGFMT ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++disk size: 4.48 MiB ++Format specific information: ++ compat: 1.1 ++ lazy refcounts: false ++ bitmaps: ++ [0]: ++ flags: ++ name: b1 ++ granularity: 524288 ++ [1]: ++ flags: ++ [0]: auto ++ name: b2 ++ granularity: 65536 ++ [2]: ++ flags: ++ name: b0 ++ granularity: 65536 ++ refcount bits: 16 ++ corrupt: false ++ ++=== Check bitmap contents === ++ ++[{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 3145728, "length": 1048576, "depth": 0, "zero": false, "data": false}, ++{ "start": 4194304, "length": 6291456, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] ++[{ "start": 0, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 1048576, "length": 1048576, "depth": 0, "zero": false, "data": false}, ++{ "start": 2097152, "length": 8388608, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] ++[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, ++{ "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 9c565cf..033b54d 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -290,3 +290,4 @@ + 280 rw migration quick + 281 rw quick + 284 rw ++291 rw quick +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch b/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch new file mode 100644 index 0000000..a8ea8f7 --- /dev/null +++ b/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch @@ -0,0 +1,138 @@ +From 55f3a02574da226299d99bd74d12dd91b0f228dc Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:46 +0000 +Subject: [PATCH 05/20] iotests: Add test for image creation fallback + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-6-mlevitsk@redhat.com> +Patchwork-id: 94228 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Add test for image creation fallback +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-6-mreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Maxim Levitsky +[mreitz: Added a note that NBD does not support resizing, which is why + the second case is expected to fail] +Signed-off-by: Max Reitz +(cherry picked from commit 4dddeac115c5a2c5f74731fda0afd031a0b45490) +Signed-off-by: Maxim Levitsky + +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/259 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/259.out | 14 +++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 77 insertions(+) + create mode 100755 tests/qemu-iotests/259 + create mode 100644 tests/qemu-iotests/259.out + +diff --git a/tests/qemu-iotests/259 b/tests/qemu-iotests/259 +new file mode 100755 +index 0000000..62e29af +--- /dev/null ++++ b/tests/qemu-iotests/259 +@@ -0,0 +1,62 @@ ++#!/usr/bin/env bash ++# ++# Test generic image creation fallback (by using NBD) ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=mreitz@redhat.com ++ ++seq=$(basename $0) ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto nbd ++_supported_os Linux ++ ++ ++_make_test_img 64M ++ ++echo ++echo '--- Testing creation ---' ++ ++$QEMU_IMG create -f qcow2 "$TEST_IMG" 64M | _filter_img_create ++$QEMU_IMG info "$TEST_IMG" | _filter_img_info ++ ++echo ++echo '--- Testing creation for which the node would need to grow ---' ++ ++# NBD does not support resizing, so this will fail ++$QEMU_IMG create -f qcow2 -o preallocation=metadata "$TEST_IMG" 64M 2>&1 \ ++ | _filter_img_create ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/259.out b/tests/qemu-iotests/259.out +new file mode 100644 +index 0000000..ffed19c +--- /dev/null ++++ b/tests/qemu-iotests/259.out +@@ -0,0 +1,14 @@ ++QA output created by 259 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++ ++--- Testing creation --- ++Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 ++image: TEST_DIR/t.IMGFMT ++file format: qcow2 ++virtual size: 64 MiB (67108864 bytes) ++disk size: unavailable ++ ++--- Testing creation for which the node would need to grow --- ++qemu-img: TEST_DIR/t.IMGFMT: Could not resize image: Image format driver does not support resize ++Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 preallocation=metadata ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index c0e8197..e47cbfc 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -273,6 +273,7 @@ + 256 rw quick + 257 rw + 258 rw quick ++259 rw auto quick + 260 rw quick + 261 rw + 262 rw quick migration +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Backup-with-different-source-target-size.patch b/SOURCES/kvm-iotests-Backup-with-different-source-target-size.patch new file mode 100644 index 0000000..4008413 --- /dev/null +++ b/SOURCES/kvm-iotests-Backup-with-different-source-target-size.patch @@ -0,0 +1,105 @@ +From 456c5e79c32e3f2f9319a7d1452fe523aded7835 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:21 +0100 +Subject: [PATCH 22/26] iotests: Backup with different source/target size + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-8-kwolf@redhat.com> +Patchwork-id: 97106 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 07/11] iotests: Backup with different source/target size +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +This tests that the backup job catches situations where the target node +has a different size than the source node. It must also forbid resize +operations when the job is already running. + +Signed-off-by: Kevin Wolf +Message-Id: <20200430142755.315494-5-kwolf@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 0a82a9273062d05764e3df3637b3aa95ad8291c6) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/055 | 42 ++++++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/055.out | 4 ++-- + 2 files changed, 42 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 +index c9cdc06..1c70389 100755 +--- a/tests/qemu-iotests/055 ++++ b/tests/qemu-iotests/055 +@@ -48,8 +48,10 @@ class TestSingleDrive(iotests.QMPTestCase): + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) + +- self.vm = iotests.VM().add_drive('blkdebug::' + test_img) +- self.vm.add_drive(blockdev_target_img, interface="none") ++ self.vm = iotests.VM() ++ self.vm.add_drive('blkdebug::' + test_img, 'node-name=source') ++ self.vm.add_drive(blockdev_target_img, 'node-name=target', ++ interface="none") + if iotests.qemu_default_machine == 'pc': + self.vm.add_drive(None, 'media=cdrom', 'ide') + self.vm.launch() +@@ -112,6 +114,42 @@ class TestSingleDrive(iotests.QMPTestCase): + def test_pause_blockdev_backup(self): + self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) + ++ def do_test_resize_blockdev_backup(self, device, node): ++ def pre_finalize(): ++ result = self.vm.qmp('block_resize', device=device, size=65536) ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ result = self.vm.qmp('block_resize', node_name=node, size=65536) ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ result = self.vm.qmp('blockdev-backup', job_id='job0', device='drive0', ++ target='drive1', sync='full', auto_finalize=False, ++ auto_dismiss=False) ++ self.assert_qmp(result, 'return', {}) ++ ++ self.vm.run_job('job0', auto_finalize=False, pre_finalize=pre_finalize, ++ use_log=False) ++ ++ def test_source_resize_blockdev_backup(self): ++ self.do_test_resize_blockdev_backup('drive0', 'source') ++ ++ def test_target_resize_blockdev_backup(self): ++ self.do_test_resize_blockdev_backup('drive1', 'target') ++ ++ def do_test_target_size(self, size): ++ result = self.vm.qmp('block_resize', device='drive1', size=size) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm.qmp('blockdev-backup', job_id='job0', device='drive0', ++ target='drive1', sync='full') ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ def test_small_target(self): ++ self.do_test_target_size(image_len // 2) ++ ++ def test_large_target(self): ++ self.do_test_target_size(image_len * 2) ++ + def test_medium_not_found(self): + if iotests.qemu_default_machine != 'pc': + return +diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out +index 5c26d15..0a5e958 100644 +--- a/tests/qemu-iotests/055.out ++++ b/tests/qemu-iotests/055.out +@@ -1,5 +1,5 @@ +-.................................... ++........................................ + ---------------------------------------------------------------------- +-Ran 36 tests ++Ran 40 tests + + OK +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch b/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch new file mode 100644 index 0000000..805b31a --- /dev/null +++ b/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch @@ -0,0 +1,59 @@ +From 05fedde1374abb180cd2b51457385d8128aa7fe4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:00 +0000 +Subject: [PATCH 03/18] iotests: Create VM.blockdev_create() + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-3-kwolf@redhat.com> +Patchwork-id: 93748 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] iotests: Create VM.blockdev_create() +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +We have several almost identical copies of a blockdev_create() function +in different test cases. Time to create one unified function in +iotests.py. + +To keep the diff managable, this patch only creates the function and +follow-up patches will convert the individual test cases. + +Signed-off-by: Kevin Wolf +(cherry picked from commit e9dbd1cae86f7cb6f8e470e1485aeb0c6e23ae64) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 3cff671..5741efb 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -638,6 +638,22 @@ class VM(qtest.QEMUQtestMachine): + elif status == 'null': + return error + ++ # Returns None on success, and an error string on failure ++ def blockdev_create(self, options, job_id='job0', filters=None): ++ if filters is None: ++ filters = [filter_qmp_testfiles] ++ result = self.qmp_log('blockdev-create', filters=filters, ++ job_id=job_id, options=options) ++ ++ if 'return' in result: ++ assert result['return'] == {} ++ job_result = self.run_job(job_id) ++ else: ++ job_result = result['error'] ++ ++ log("") ++ return job_result ++ + def enable_migration_events(self, name): + log('Enabling migration QMP events on %s...' % name) + log(self.qmp('migrate-set-capabilities', capabilities=[ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch b/SOURCES/kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch new file mode 100644 index 0000000..60c08ec --- /dev/null +++ b/SOURCES/kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch @@ -0,0 +1,52 @@ +From 8dc8a17d4e98aae41db01cbc073e69de44291b63 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:38 +0100 +Subject: [PATCH 10/17] iotests: Filter testfiles out in filter_img_info() + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-10-kwolf@redhat.com> +Patchwork-id: 97455 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 09/11] iotests: Filter testfiles out in filter_img_info() +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +We want to keep TEST_IMG for the full path of the main test image, but +filter_testfiles() must be called for other test images before replacing +other things like the image format because the test directory path could +contain the format as a substring. + +Insert a filter_testfiles() call between both. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200424125448.63318-9-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit fd586ce8bee50d98773436214dc9e644ddda54aa) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 7a9c779..cd5df36 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -335,8 +335,9 @@ def filter_img_info(output, filename): + for line in output.split('\n'): + if 'disk size' in line or 'actual-size' in line: + continue +- line = line.replace(filename, 'TEST_IMG') \ +- .replace(imgfmt, 'IMGFMT') ++ line = line.replace(filename, 'TEST_IMG') ++ line = filter_testfiles(line) ++ line = line.replace(imgfmt, 'IMGFMT') + line = re.sub('iters: [0-9]+', 'iters: XXX', line) + line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line) + line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch b/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch new file mode 100644 index 0000000..b105fc2 --- /dev/null +++ b/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch @@ -0,0 +1,47 @@ +From bb7b968a02c97564596b73d8d080cd745d96ed6b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:35 +0000 +Subject: [PATCH 15/20] iotests: Fix run_job() with use_log=False + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-10-kwolf@redhat.com> +Patchwork-id: 94284 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/13] iotests: Fix run_job() with use_log=False +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +The 'job-complete' QMP command should be run with qmp() rather than +qmp_log() if use_log=False is passed. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-4-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit b31b532122ec6f68d17168449c034d2197bf96ec) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 0c55f7b..46f880c 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -618,7 +618,10 @@ class VM(qtest.QEMUQtestMachine): + if use_log: + log('Job failed: %s' % (j['error'])) + elif status == 'ready': +- self.qmp_log('job-complete', id=job) ++ if use_log: ++ self.qmp_log('job-complete', id=job) ++ else: ++ self.qmp('job-complete', id=job) + elif status == 'pending' and not auto_finalize: + if pre_finalize: + pre_finalize() +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Fix-test-178.patch b/SOURCES/kvm-iotests-Fix-test-178.patch new file mode 100644 index 0000000..5e54daa --- /dev/null +++ b/SOURCES/kvm-iotests-Fix-test-178.patch @@ -0,0 +1,59 @@ +From a04d324e41a40a6893bc94109994afc017f17192 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:16 +0100 +Subject: [PATCH 11/26] iotests: Fix test 178 + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-9-eblake@redhat.com> +Patchwork-id: 97075 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 08/12] iotests: Fix test 178 +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +A recent change to qemu-img changed expected error message output, but +178 takes long enough to execute that it does not get run by 'make +check' or './check -g quick'. + +Fixes: 43d589b074 +Signed-off-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200521192137.1120211-2-eblake@redhat.com> +(cherry picked from commit ca01b7a641527052e3e8961845b40b81706ce5f9) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/178.out.qcow2 | 2 +- + tests/qemu-iotests/178.out.raw | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 +index 9e7d8c4..345eab3 100644 +--- a/tests/qemu-iotests/178.out.qcow2 ++++ b/tests/qemu-iotests/178.out.qcow2 +@@ -13,7 +13,7 @@ qemu-img: Invalid option list: , + qemu-img: Invalid parameter 'snapshot.foo' + qemu-img: Failed in parsing snapshot param 'snapshot.foo' + qemu-img: --output must be used with human or json as argument. +-qemu-img: Image size must be less than 8 EiB! ++qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. + qemu-img: Unknown file format 'foo' + + == Size calculation for a new file (human) == +diff --git a/tests/qemu-iotests/178.out.raw b/tests/qemu-iotests/178.out.raw +index 6478365..15da915 100644 +--- a/tests/qemu-iotests/178.out.raw ++++ b/tests/qemu-iotests/178.out.raw +@@ -13,7 +13,7 @@ qemu-img: Invalid option list: , + qemu-img: Invalid parameter 'snapshot.foo' + qemu-img: Failed in parsing snapshot param 'snapshot.foo' + qemu-img: --output must be used with human or json as argument. +-qemu-img: Image size must be less than 8 EiB! ++qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. + qemu-img: Unknown file format 'foo' + + == Size calculation for a new file (human) == +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Let-_make_test_img-parse-its-parameters.patch b/SOURCES/kvm-iotests-Let-_make_test_img-parse-its-parameters.patch new file mode 100644 index 0000000..d24f5e7 --- /dev/null +++ b/SOURCES/kvm-iotests-Let-_make_test_img-parse-its-parameters.patch @@ -0,0 +1,91 @@ +From 3c96dbd74fb67e2ae1a116b2771290b192041707 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:10 +0100 +Subject: [PATCH 05/26] iotests: Let _make_test_img parse its parameters + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-3-eblake@redhat.com> +Patchwork-id: 97070 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 02/12] iotests: Let _make_test_img parse its parameters +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +From: Max Reitz + +This will allow us to add more options than just -b. + +Signed-off-by: Max Reitz +Reviewed-by: Maxim Levitsky +Message-id: 20191107163708.833192-9-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit eea871d047701b563cfd66c1566b9ff6d163882b) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/common.rc | 28 ++++++++++++++++++++-------- + 1 file changed, 20 insertions(+), 8 deletions(-) + +diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc +index 0cc8acc..99fef4d 100644 +--- a/tests/qemu-iotests/common.rc ++++ b/tests/qemu-iotests/common.rc +@@ -302,12 +302,12 @@ _make_test_img() + # extra qemu-img options can be added by tests + # at least one argument (the image size) needs to be added + local extra_img_options="" +- local image_size=$* + local optstr="" + local img_name="" + local use_backing=0 + local backing_file="" + local object_options="" ++ local misc_params=() + + if [ -n "$TEST_IMG_FILE" ]; then + img_name=$TEST_IMG_FILE +@@ -323,11 +323,23 @@ _make_test_img() + optstr=$(_optstr_add "$optstr" "key-secret=keysec0") + fi + +- if [ "$1" = "-b" ]; then +- use_backing=1 +- backing_file=$2 +- image_size=$3 +- fi ++ for param; do ++ if [ "$use_backing" = "1" -a -z "$backing_file" ]; then ++ backing_file=$param ++ continue ++ fi ++ ++ case "$param" in ++ -b) ++ use_backing=1 ++ ;; ++ ++ *) ++ misc_params=("${misc_params[@]}" "$param") ++ ;; ++ esac ++ done ++ + if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then + optstr=$(_optstr_add "$optstr" "cluster_size=$CLUSTER_SIZE") + fi +@@ -343,9 +355,9 @@ _make_test_img() + # XXX(hch): have global image options? + ( + if [ $use_backing = 1 ]; then +- $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" $image_size 2>&1 ++ $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" "${misc_params[@]}" 2>&1 + else +- $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options "$img_name" $image_size 2>&1 ++ $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options "$img_name" "${misc_params[@]}" 2>&1 + fi + ) | _filter_img_create + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Mirror-with-different-source-target-size.patch b/SOURCES/kvm-iotests-Mirror-with-different-source-target-size.patch new file mode 100644 index 0000000..7757632 --- /dev/null +++ b/SOURCES/kvm-iotests-Mirror-with-different-source-target-size.patch @@ -0,0 +1,110 @@ +From aff543186ff316d66b2c7acb434c6c17bdb8da78 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:25 +0100 +Subject: [PATCH 26/26] iotests: Mirror with different source/target size + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-12-kwolf@redhat.com> +Patchwork-id: 97109 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 11/11] iotests: Mirror with different source/target size +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +This tests that the mirror job catches situations where the target node +has a different size than the source node. It must also forbid resize +operations when the job is already running. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Message-Id: <20200511135825.219437-5-kwolf@redhat.com> +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 16cea4ee1c8e5a69a058e76f426b2e17974d8d7d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/041 | 45 +++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/041.out | 4 ++-- + 2 files changed, 47 insertions(+), 2 deletions(-) + +diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 +index a543b15..20fb68a 100755 +--- a/tests/qemu-iotests/041 ++++ b/tests/qemu-iotests/041 +@@ -240,6 +240,49 @@ class TestSingleBlockdev(TestSingleDrive): + target=self.qmp_target) + self.assert_qmp(result, 'error/class', 'GenericError') + ++ def do_test_resize(self, device, node): ++ def pre_finalize(): ++ if device: ++ result = self.vm.qmp('block_resize', device=device, size=65536) ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ result = self.vm.qmp('block_resize', node_name=node, size=65536) ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ result = self.vm.qmp(self.qmp_cmd, job_id='job0', device='drive0', ++ sync='full', target=self.qmp_target, ++ auto_finalize=False, auto_dismiss=False) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm.run_job('job0', auto_finalize=False, ++ pre_finalize=pre_finalize, use_log=False) ++ self.assertEqual(result, None) ++ ++ def test_source_resize(self): ++ self.do_test_resize('drive0', 'top') ++ ++ def test_target_resize(self): ++ self.do_test_resize(None, self.qmp_target) ++ ++ def do_test_target_size(self, size): ++ result = self.vm.qmp('block_resize', node_name=self.qmp_target, ++ size=size) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm.qmp(self.qmp_cmd, job_id='job0', ++ device='drive0', sync='full', auto_dismiss=False, ++ target=self.qmp_target) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm.run_job('job0', use_log=False) ++ self.assertEqual(result, 'Source and target image have different sizes') ++ ++ def test_small_target(self): ++ self.do_test_target_size(self.image_len // 2) ++ ++ def test_large_target(self): ++ self.do_test_target_size(self.image_len * 2) ++ + test_large_cluster = None + test_image_not_found = None + test_small_buffer2 = None +@@ -251,6 +294,8 @@ class TestSingleDriveZeroLength(TestSingleDrive): + + class TestSingleBlockdevZeroLength(TestSingleBlockdev): + image_len = 0 ++ test_small_target = None ++ test_large_target = None + + class TestSingleDriveUnalignedLength(TestSingleDrive): + image_len = 1025 * 1024 +diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out +index 2c448b4..3ea6aa4 100644 +--- a/tests/qemu-iotests/041.out ++++ b/tests/qemu-iotests/041.out +@@ -1,5 +1,5 @@ +-.......................................................................................... ++.................................................................................................... + ---------------------------------------------------------------------- +-Ran 90 tests ++Ran 100 tests + + OK +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch b/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch new file mode 100644 index 0000000..17e4a41 --- /dev/null +++ b/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch @@ -0,0 +1,122 @@ +From 7e23b64dc20b64ca6fa887cd06cc5e52374f6268 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:30 +0000 +Subject: [PATCH 10/20] iotests: Refactor blockdev-reopen test for iothreads + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-5-kwolf@redhat.com> +Patchwork-id: 94281 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/13] iotests: Refactor blockdev-reopen test for iothreads +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +We'll want to test more than one successful case in the future, so +prepare the test for that by a refactoring that runs each scenario in a +separate VM. + +test_iothreads_switch_{backing,overlay} currently produce errors, but +these are cases that should actually work, by switching either the +backing file node or the overlay node to the AioContext of the other +node. + +Signed-off-by: Kevin Wolf +Tested-by: Peter Krempa +Message-Id: <20200306141413.30705-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 97518e11c3d902a32386d33797044f6b79bccc6f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/245 | 47 ++++++++++++++++++++++++++++++++++++---------- + tests/qemu-iotests/245.out | 4 ++-- + 2 files changed, 39 insertions(+), 12 deletions(-) + +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index e66a23c..f69c2fa 100644 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -968,8 +968,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assertEqual(self.get_node('hd1'), None) + self.assert_qmp(self.get_node('hd2'), 'ro', True) + +- # We don't allow setting a backing file that uses a different AioContext +- def test_iothreads(self): ++ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): + opts = hd_opts(0) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) + self.assert_qmp(result, 'return', {}) +@@ -984,20 +983,48 @@ class TestBlockdevReopen(iotests.QMPTestCase): + result = self.vm.qmp('object-add', qom_type='iothread', id='iothread1') + self.assert_qmp(result, 'return', {}) + +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd0', iothread='iothread0') ++ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi0', ++ iothread=iothread_a) + self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") +- +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread1') ++ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi1', ++ iothread=iothread_b) + self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") ++ if iothread_a: ++ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd0', ++ share_rw=True, bus="scsi0.0") ++ self.assert_qmp(result, 'return', {}) + +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread0') +- self.assert_qmp(result, 'return', {}) ++ if iothread_b: ++ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd2', ++ share_rw=True, bus="scsi1.0") ++ self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}) ++ # Attaching the backing file may or may not work ++ self.reopen(opts, {'backing': 'hd2'}, errmsg) ++ ++ # But removing the backing file should always work ++ self.reopen(opts, {'backing': None}) ++ ++ self.vm.shutdown() ++ ++ # We don't allow setting a backing file that uses a different AioContext if ++ # neither of them can switch to the other AioContext ++ def test_iothreads_error(self): ++ self.run_test_iothreads('iothread0', 'iothread1', ++ "Cannot use a new backing file with a different AioContext") ++ ++ def test_iothreads_compatible_users(self): ++ self.run_test_iothreads('iothread0', 'iothread0') ++ ++ def test_iothreads_switch_backing(self): ++ self.run_test_iothreads('iothread0', None, ++ "Cannot use a new backing file with a different AioContext") ++ ++ def test_iothreads_switch_overlay(self): ++ self.run_test_iothreads(None, 'iothread0', ++ "Cannot use a new backing file with a different AioContext") + + if __name__ == '__main__': + iotests.main(supported_fmts=["qcow2"], +diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out +index a19de52..682b933 100644 +--- a/tests/qemu-iotests/245.out ++++ b/tests/qemu-iotests/245.out +@@ -1,6 +1,6 @@ +-.................. ++..................... + ---------------------------------------------------------------------- +-Ran 18 tests ++Ran 21 tests + + OK + {"execute": "job-finalize", "arguments": {"id": "commit0"}} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch b/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch new file mode 100644 index 0000000..08971a0 --- /dev/null +++ b/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch @@ -0,0 +1,46 @@ +From a3778aef0be61dead835af39073a62bbf72c8e20 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:23:59 +0000 +Subject: [PATCH 02/18] iotests: Support job-complete in run_job() + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-2-kwolf@redhat.com> +Patchwork-id: 93746 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] iotests: Support job-complete in run_job() +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Automatically complete jobs that have a 'ready' state and need an +explicit job-complete. Without this, run_job() would hang for such +jobs. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 4688c4e32ec76004676470f11734478799673d6d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index df07089..3cff671 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -617,6 +617,8 @@ class VM(qtest.QEMUQtestMachine): + error = j['error'] + if use_log: + log('Job failed: %s' % (j['error'])) ++ elif status == 'ready': ++ self.qmp_log('job-complete', id=job) + elif status == 'pending' and not auto_finalize: + if pre_finalize: + pre_finalize() +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Test-committing-to-short-backing-file.patch b/SOURCES/kvm-iotests-Test-committing-to-short-backing-file.patch new file mode 100644 index 0000000..fbbaac6 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-committing-to-short-backing-file.patch @@ -0,0 +1,480 @@ +From e2a1b3fd32be8bb730656a6f22eb4f543b120c9d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:39 +0100 +Subject: [PATCH 11/17] iotests: Test committing to short backing file + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-11-kwolf@redhat.com> +Patchwork-id: 97453 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 10/11] iotests: Test committing to short backing file +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +Signed-off-by: Kevin Wolf +Message-Id: <20200424125448.63318-10-kwolf@redhat.com> +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit bf03dede475e29a16f9188ea85a4d77cd3dcf2b7) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/274 | 155 ++++++++++++++++++++++++++ + tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 424 insertions(+) + create mode 100755 tests/qemu-iotests/274 + create mode 100644 tests/qemu-iotests/274.out + +diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274 +new file mode 100755 +index 0000000..e951f72 +--- /dev/null ++++ b/tests/qemu-iotests/274 +@@ -0,0 +1,155 @@ ++#!/usr/bin/env python3 ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++# Creator/Owner: Kevin Wolf ++# ++# Some tests for short backing files and short overlays ++ ++import iotests ++ ++iotests.verify_image_format(supported_fmts=['qcow2']) ++iotests.verify_platform(['linux']) ++ ++size_short = 1 * 1024 * 1024 ++size_long = 2 * 1024 * 1024 ++size_diff = size_long - size_short ++ ++def create_chain() -> None: ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, ++ str(size_long)) ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid, ++ str(size_short)) ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top, ++ str(size_long)) ++ ++ iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base) ++ ++def create_vm() -> iotests.VM: ++ vm = iotests.VM() ++ vm.add_blockdev('file,filename=%s,node-name=base-file' % base) ++ vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt) ++ vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid) ++ vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base' ++ % iotests.imgfmt) ++ vm.add_drive(top, 'backing=mid,node-name=top') ++ return vm ++ ++with iotests.FilePath('base') as base, \ ++ iotests.FilePath('mid') as mid, \ ++ iotests.FilePath('top') as top: ++ ++ iotests.log('== Commit tests ==') ++ ++ create_chain() ++ ++ iotests.log('=== Check visible data ===') ++ ++ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top) ++ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top) ++ ++ iotests.log('=== Checking allocation status ===') ++ ++ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, ++ '-c', 'alloc %d %d' % (size_short, size_diff), ++ base) ++ ++ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, ++ '-c', 'alloc %d %d' % (size_short, size_diff), ++ mid) ++ ++ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, ++ '-c', 'alloc %d %d' % (size_short, size_diff), ++ top) ++ ++ iotests.log('=== Checking map ===') ++ ++ iotests.qemu_img_log('map', '--output=json', base) ++ iotests.qemu_img_log('map', '--output=human', base) ++ iotests.qemu_img_log('map', '--output=json', mid) ++ iotests.qemu_img_log('map', '--output=human', mid) ++ iotests.qemu_img_log('map', '--output=json', top) ++ iotests.qemu_img_log('map', '--output=human', top) ++ ++ iotests.log('=== Testing qemu-img commit (top -> mid) ===') ++ ++ iotests.qemu_img_log('commit', top) ++ iotests.img_info_log(mid) ++ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) ++ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) ++ ++ iotests.log('=== Testing HMP commit (top -> mid) ===') ++ ++ create_chain() ++ with create_vm() as vm: ++ vm.launch() ++ vm.qmp_log('human-monitor-command', command_line='commit drive0') ++ ++ iotests.img_info_log(mid) ++ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) ++ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) ++ ++ iotests.log('=== Testing QMP active commit (top -> mid) ===') ++ ++ create_chain() ++ with create_vm() as vm: ++ vm.launch() ++ vm.qmp_log('block-commit', device='top', base_node='mid', ++ job_id='job0', auto_dismiss=False) ++ vm.run_job('job0', wait=5) ++ ++ iotests.img_info_log(mid) ++ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) ++ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) ++ ++ ++ iotests.log('== Resize tests ==') ++ ++ # Use different sizes for different allocation modes: ++ # ++ # We want to have at least one test where 32 bit truncation in the size of ++ # the overlapping area becomes visible. This is covered by the ++ # prealloc='off' case (1G to 6G is an overlap of 5G). ++ # ++ # However, we can only do this for modes that don't preallocate data ++ # because otherwise we might run out of space on the test host. ++ # ++ # We also want to test some unaligned combinations. ++ for (prealloc, base_size, top_size_old, top_size_new, off) in [ ++ ('off', '6G', '1G', '8G', '5G'), ++ ('metadata', '32G', '30G', '33G', '31G'), ++ ('falloc', '10M', '5M', '15M', '9M'), ++ ('full', '16M', '8M', '12M', '11M'), ++ ('off', '384k', '253k', '512k', '253k'), ++ ('off', '400k', '256k', '512k', '336k'), ++ ('off', '512k', '256k', '500k', '436k')]: ++ ++ iotests.log('=== preallocation=%s ===' % prealloc) ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size) ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top, ++ top_size_old) ++ iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base) ++ ++ # After this, top_size_old to base_size should be allocated/zeroed. ++ # ++ # In theory, leaving base_size to top_size_new unallocated would be ++ # correct, but in practice, if we zero out anything, we zero out ++ # everything up to top_size_new. ++ iotests.qemu_img_log('resize', '-f', iotests.imgfmt, ++ '--preallocation', prealloc, top, top_size_new) ++ iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top) ++ iotests.qemu_io_log('-c', 'map', top) ++ iotests.qemu_img_log('map', '--output=json', top) +diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out +new file mode 100644 +index 0000000..1a796fd +--- /dev/null ++++ b/tests/qemu-iotests/274.out +@@ -0,0 +1,268 @@ ++== Commit tests == ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 2097152/2097152 bytes at offset 0 ++2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++=== Check visible data === ++read 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++read 1048576/1048576 bytes at offset 1048576 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++=== Checking allocation status === ++1048576/1048576 bytes allocated at offset 0 bytes ++1048576/1048576 bytes allocated at offset 1 MiB ++ ++0/1048576 bytes allocated at offset 0 bytes ++0/0 bytes allocated at offset 1 MiB ++ ++0/1048576 bytes allocated at offset 0 bytes ++0/1048576 bytes allocated at offset 1 MiB ++ ++=== Checking map === ++[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}] ++ ++Offset Length Mapped to File ++0 0x200000 0x50000 TEST_DIR/PID-base ++ ++[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}] ++ ++Offset Length Mapped to File ++0 0x100000 0x50000 TEST_DIR/PID-base ++ ++[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680}, ++{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}] ++ ++Offset Length Mapped to File ++0 0x100000 0x50000 TEST_DIR/PID-base ++ ++=== Testing qemu-img commit (top -> mid) === ++Image committed. ++ ++image: TEST_IMG ++file format: IMGFMT ++virtual size: 2 MiB (2097152 bytes) ++cluster_size: 65536 ++backing file: TEST_DIR/PID-base ++Format specific information: ++ compat: 1.1 ++ lazy refcounts: false ++ refcount bits: 16 ++ corrupt: false ++ ++read 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++read 1048576/1048576 bytes at offset 1048576 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++=== Testing HMP commit (top -> mid) === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 2097152/2097152 bytes at offset 0 ++2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}} ++{"return": ""} ++image: TEST_IMG ++file format: IMGFMT ++virtual size: 2 MiB (2097152 bytes) ++cluster_size: 65536 ++backing file: TEST_DIR/PID-base ++Format specific information: ++ compat: 1.1 ++ lazy refcounts: false ++ refcount bits: 16 ++ corrupt: false ++ ++read 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++read 1048576/1048576 bytes at offset 1048576 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++=== Testing QMP active commit (top -> mid) === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 2097152/2097152 bytes at offset 0 ++2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}} ++{"return": {}} ++{"execute": "job-complete", "arguments": {"id": "job0"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} ++image: TEST_IMG ++file format: IMGFMT ++virtual size: 2 MiB (2097152 bytes) ++cluster_size: 65536 ++backing file: TEST_DIR/PID-base ++Format specific information: ++ compat: 1.1 ++ lazy refcounts: false ++ refcount bits: 16 ++ corrupt: false ++ ++read 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++read 1048576/1048576 bytes at offset 1048576 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== Resize tests == ++=== preallocation=off === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 65536/65536 bytes at offset 5368709120 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++Image resized. ++ ++read 65536/65536 bytes at offset 5368709120 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0) ++7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000) ++ ++[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false}, ++{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}] ++ ++=== preallocation=metadata === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 65536/65536 bytes at offset 33285996544 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++Image resized. ++ ++read 65536/65536 bytes at offset 33285996544 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0) ++3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000) ++ ++[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false}, ++{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680}, ++{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128}, ++{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576}, ++{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024}, ++{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008}, ++{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}] ++ ++=== preallocation=falloc === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 65536/65536 bytes at offset 9437184 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++Image resized. ++ ++read 65536/65536 bytes at offset 9437184 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0) ++10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) ++ ++[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, ++{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] ++ ++=== preallocation=full === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 65536/65536 bytes at offset 11534336 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++Image resized. ++ ++read 65536/65536 bytes at offset 11534336 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0) ++4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) ++ ++[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, ++{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] ++ ++=== preallocation=off === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 65536/65536 bytes at offset 259072 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++Image resized. ++ ++read 65536/65536 bytes at offset 259072 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0) ++320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000) ++ ++[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false}, ++{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680}, ++{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] ++ ++=== preallocation=off === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 65536/65536 bytes at offset 344064 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++Image resized. ++ ++read 65536/65536 bytes at offset 344064 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) ++256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000) ++ ++[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, ++{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] ++ ++=== preallocation=off === ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++wrote 65536/65536 bytes at offset 446464 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++Image resized. ++ ++read 65536/65536 bytes at offset 446464 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) ++244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000) ++ ++[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, ++{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}] ++ +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 033b54d..cddae00 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -286,6 +286,7 @@ + 270 rw backing quick + 272 rw + 273 backing quick ++274 rw backing + 277 rw quick + 280 rw migration quick + 281 rw quick +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch b/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch new file mode 100644 index 0000000..6fcb2f6 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch @@ -0,0 +1,189 @@ +From 38b0cff9703fc740c30f5874973ac1be88f94d9f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:03 +0000 +Subject: [PATCH 06/18] iotests: Test external snapshot with VM state + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-6-kwolf@redhat.com> +Patchwork-id: 93752 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Test external snapshot with VM state +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +This tests creating an external snapshot with VM state (which results in +an active overlay over an inactive backing file, which is also the root +node of an inactive BlockBackend), re-activating the images and +performing some operations to test that the re-activation worked as +intended. + +Signed-off-by: Kevin Wolf +(cherry picked from commit f62f08ab7a9d902da70078992248ec5c98f652ad) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/280 | 83 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/280.out | 50 ++++++++++++++++++++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 134 insertions(+) + create mode 100755 tests/qemu-iotests/280 + create mode 100644 tests/qemu-iotests/280.out + +diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 +new file mode 100755 +index 0000000..0b1fa8e +--- /dev/null ++++ b/tests/qemu-iotests/280 +@@ -0,0 +1,83 @@ ++#!/usr/bin/env python ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++# Creator/Owner: Kevin Wolf ++# ++# Test migration to file for taking an external snapshot with VM state. ++ ++import iotests ++import os ++ ++iotests.verify_image_format(supported_fmts=['qcow2']) ++iotests.verify_protocol(supported=['file']) ++iotests.verify_platform(['linux']) ++ ++with iotests.FilePath('base') as base_path , \ ++ iotests.FilePath('top') as top_path, \ ++ iotests.VM() as vm: ++ ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base_path, '64M') ++ ++ iotests.log('=== Launch VM ===') ++ vm.add_object('iothread,id=iothread0') ++ vm.add_blockdev('file,filename=%s,node-name=base-file' % (base_path)) ++ vm.add_blockdev('%s,file=base-file,node-name=base-fmt' % (iotests.imgfmt)) ++ vm.add_device('virtio-blk,drive=base-fmt,iothread=iothread0,id=vda') ++ vm.launch() ++ ++ vm.enable_migration_events('VM') ++ ++ iotests.log('\n=== Migrate to file ===') ++ vm.qmp_log('migrate', uri='exec:cat > /dev/null') ++ ++ with iotests.Timeout(3, 'Migration does not complete'): ++ vm.wait_migration() ++ ++ iotests.log('\nVM is now stopped:') ++ iotests.log(vm.qmp('query-migrate')['return']['status']) ++ vm.qmp_log('query-status') ++ ++ iotests.log('\n=== Create a snapshot of the disk image ===') ++ vm.blockdev_create({ ++ 'driver': 'file', ++ 'filename': top_path, ++ 'size': 0, ++ }) ++ vm.qmp_log('blockdev-add', node_name='top-file', ++ driver='file', filename=top_path, ++ filters=[iotests.filter_qmp_testfiles]) ++ ++ vm.blockdev_create({ ++ 'driver': iotests.imgfmt, ++ 'file': 'top-file', ++ 'size': 1024 * 1024, ++ }) ++ vm.qmp_log('blockdev-add', node_name='top-fmt', ++ driver=iotests.imgfmt, file='top-file') ++ ++ vm.qmp_log('blockdev-snapshot', node='base-fmt', overlay='top-fmt') ++ ++ iotests.log('\n=== Resume the VM and simulate a write request ===') ++ vm.qmp_log('cont') ++ iotests.log(vm.hmp_qemu_io('-d vda/virtio-backend', 'write 4k 4k')) ++ ++ iotests.log('\n=== Commit it to the backing file ===') ++ result = vm.qmp_log('block-commit', job_id='job0', auto_dismiss=False, ++ device='top-fmt', top_node='top-fmt', ++ filters=[iotests.filter_qmp_testfiles]) ++ if 'return' in result: ++ vm.run_job('job0') +diff --git a/tests/qemu-iotests/280.out b/tests/qemu-iotests/280.out +new file mode 100644 +index 0000000..5d382fa +--- /dev/null ++++ b/tests/qemu-iotests/280.out +@@ -0,0 +1,50 @@ ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++=== Launch VM === ++Enabling migration QMP events on VM... ++{"return": {}} ++ ++=== Migrate to file === ++{"execute": "migrate", "arguments": {"uri": "exec:cat > /dev/null"}} ++{"return": {}} ++{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++ ++VM is now stopped: ++completed ++{"execute": "query-status", "arguments": {}} ++{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} ++ ++=== Create a snapshot of the disk image === ++{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "file", "filename": "TEST_DIR/PID-top", "size": 0}}} ++{"return": {}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} ++ ++{"execute": "blockdev-add", "arguments": {"driver": "file", "filename": "TEST_DIR/PID-top", "node-name": "top-file"}} ++{"return": {}} ++{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "top-file", "size": 1048576}}} ++{"return": {}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} ++ ++{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": "top-file", "node-name": "top-fmt"}} ++{"return": {}} ++{"execute": "blockdev-snapshot", "arguments": {"node": "base-fmt", "overlay": "top-fmt"}} ++{"return": {}} ++ ++=== Resume the VM and simulate a write request === ++{"execute": "cont", "arguments": {}} ++{"return": {}} ++{"return": ""} ++ ++=== Commit it to the backing file === ++{"execute": "block-commit", "arguments": {"auto-dismiss": false, "device": "top-fmt", "job-id": "job0", "top-node": "top-fmt"}} ++{"return": {}} ++{"execute": "job-complete", "arguments": {"id": "job0"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 06cc734..01301cd 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -286,3 +286,4 @@ + 272 rw + 273 backing quick + 277 rw quick ++280 rw migration quick +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch b/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch new file mode 100644 index 0000000..b09439b --- /dev/null +++ b/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch @@ -0,0 +1,322 @@ +From 6b9a6ba9ed753ad7aa714b35de938ebeeb4fa6cb Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 10:27:49 +0000 +Subject: [PATCH 16/18] iotests: Test handling of AioContexts with some + blockdev actions + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-10-slp@redhat.com> +Patchwork-id: 93762 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 9/9] iotests: Test handling of AioContexts with some blockdev actions +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Includes the following tests: + + - Adding a dirty bitmap. + * RHBZ: 1782175 + + - Starting a drive-mirror to an NBD-backed target. + * RHBZ: 1746217, 1773517 + + - Aborting an external snapshot transaction. + * RHBZ: 1779036 + + - Aborting a blockdev backup transaction. + * RHBZ: 1782111 + +For each one of them, a VM with a number of disks running in an +IOThread AioContext is used. + +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 9b8c59e7610b9c5315ef093d801843dbe8debfac) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/281 | 247 +++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/281.out | 5 + + tests/qemu-iotests/group | 1 + + 3 files changed, 253 insertions(+) + create mode 100755 tests/qemu-iotests/281 + create mode 100644 tests/qemu-iotests/281.out + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +new file mode 100755 +index 0000000..269d583 +--- /dev/null ++++ b/tests/qemu-iotests/281 +@@ -0,0 +1,247 @@ ++#!/usr/bin/env python ++# ++# Test cases for blockdev + IOThread interactions ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import qemu_img ++ ++image_len = 64 * 1024 * 1024 ++ ++# Test for RHBZ#1782175 ++class TestDirtyBitmapIOThread(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ images = { 'drive0': drive0_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ ++ for name in self.images: ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' ++ % (self.images[name], name)) ++ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' ++ % (name, name)) ++ ++ self.vm.launch() ++ self.vm.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_add_dirty_bitmap(self): ++ result = self.vm.qmp( ++ 'block-dirty-bitmap-add', ++ node='drive0', ++ name='bitmap1', ++ persistent=True, ++ ) ++ ++ self.assert_qmp(result, 'return', {}) ++ ++ ++# Test for RHBZ#1746217 & RHBZ#1773517 ++class TestNBDMirrorIOThread(iotests.QMPTestCase): ++ nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ mirror_img = os.path.join(iotests.test_dir, 'mirror.img') ++ images = { 'drive0': drive0_img, 'mirror': mirror_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm_src = iotests.VM(path_suffix='src') ++ self.vm_src.add_object('iothread,id=iothread0') ++ self.vm_src.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.drive0_img)) ++ self.vm_src.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm_src.launch() ++ self.vm_src.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ self.vm_tgt = iotests.VM(path_suffix='tgt') ++ self.vm_tgt.add_object('iothread,id=iothread0') ++ self.vm_tgt.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.mirror_img)) ++ self.vm_tgt.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm_tgt.launch() ++ self.vm_tgt.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm_src.shutdown() ++ self.vm_tgt.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_nbd_mirror(self): ++ result = self.vm_tgt.qmp( ++ 'nbd-server-start', ++ addr={ ++ 'type': 'unix', ++ 'data': { 'path': self.nbd_sock } ++ } ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_tgt.qmp( ++ 'nbd-server-add', ++ device='drive0', ++ writable=True ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_src.qmp( ++ 'drive-mirror', ++ device='drive0', ++ target='nbd+unix:///drive0?socket=' + self.nbd_sock, ++ sync='full', ++ mode='existing', ++ speed=64*1024*1024, ++ job_id='j1' ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ self.vm_src.event_wait(name="BLOCK_JOB_READY") ++ ++ ++# Test for RHBZ#1779036 ++class TestExternalSnapshotAbort(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ snapshot_img = os.path.join(iotests.test_dir, 'snapshot.img') ++ images = { 'drive0': drive0_img, 'snapshot': snapshot_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.drive0_img)) ++ self.vm.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm.launch() ++ self.vm.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_external_snapshot_abort(self): ++ # Use a two actions transaction with a bogus values on the second ++ # one to trigger an abort of the transaction. ++ result = self.vm.qmp('transaction', actions=[ ++ { ++ 'type': 'blockdev-snapshot-sync', ++ 'data': { 'node-name': 'drive0', ++ 'snapshot-file': self.snapshot_img, ++ 'snapshot-node-name': 'snap1', ++ 'mode': 'absolute-paths', ++ 'format': 'qcow2' } ++ }, ++ { ++ 'type': 'blockdev-snapshot-sync', ++ 'data': { 'node-name': 'drive0', ++ 'snapshot-file': '/fakesnapshot', ++ 'snapshot-node-name': 'snap2', ++ 'mode': 'absolute-paths', ++ 'format': 'qcow2' } ++ }, ++ ]) ++ ++ # Crashes on failure, we expect this error. ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ ++# Test for RHBZ#1782111 ++class TestBlockdevBackupAbort(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ drive1_img = os.path.join(iotests.test_dir, 'drive1.img') ++ snap0_img = os.path.join(iotests.test_dir, 'snap0.img') ++ snap1_img = os.path.join(iotests.test_dir, 'snap1.img') ++ images = { 'drive0': drive0_img, ++ 'drive1': drive1_img, ++ 'snap0': snap0_img, ++ 'snap1': snap1_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ self.vm.add_device('virtio-scsi,iothread=iothread0') ++ ++ for name in self.images: ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' ++ % (self.images[name], name)) ++ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' ++ % (name, name)) ++ ++ self.vm.add_device('scsi-hd,drive=drive0') ++ self.vm.add_device('scsi-hd,drive=drive1') ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_blockdev_backup_abort(self): ++ # Use a two actions transaction with a bogus values on the second ++ # one to trigger an abort of the transaction. ++ result = self.vm.qmp('transaction', actions=[ ++ { ++ 'type': 'blockdev-backup', ++ 'data': { 'device': 'drive0', ++ 'target': 'snap0', ++ 'sync': 'full', ++ 'job-id': 'j1' } ++ }, ++ { ++ 'type': 'blockdev-backup', ++ 'data': { 'device': 'drive1', ++ 'target': 'snap1', ++ 'sync': 'full' } ++ }, ++ ]) ++ ++ # Hangs on failure, we expect this error. ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++if __name__ == '__main__': ++ iotests.main(supported_fmts=['qcow2'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +new file mode 100644 +index 0000000..89968f3 +--- /dev/null ++++ b/tests/qemu-iotests/281.out +@@ -0,0 +1,5 @@ ++.... ++---------------------------------------------------------------------- ++Ran 4 tests ++ ++OK +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 01301cd..c0e8197 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -287,3 +287,4 @@ + 273 backing quick + 277 rw quick + 280 rw migration quick ++281 rw quick +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch b/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch new file mode 100644 index 0000000..58ef198 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch @@ -0,0 +1,162 @@ +From 239f7bdeef48a3c0b07098617371b9955dc55348 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:36 +0000 +Subject: [PATCH 16/20] iotests: Test mirror with temporarily disabled target + backing file + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-11-kwolf@redhat.com> +Patchwork-id: 94288 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/13] iotests: Test mirror with temporarily disabled target backing file +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +The newly tested scenario is a common live storage migration scenario: +The target node is opened without a backing file so that the active +layer is mirrored while its backing chain can be copied in the +background. + +The backing chain should be attached to the mirror target node when +finalising the job, just before switching the users of the source node +to the new copy (at which point the mirror job still has a reference to +the node). drive-mirror did this automatically, but with blockdev-mirror +this is the job of the QMP client. + +This patch adds test cases for two ways to achieve the desired result, +using either x-blockdev-reopen or blockdev-snapshot. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-5-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 8bdee9f10eac2aefdcc5095feef756354c87bdec) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 56 +++++++++++++++++++++++++++++++++++++++++----- + tests/qemu-iotests/155.out | 4 ++-- + 2 files changed, 53 insertions(+), 7 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index d7ef257..3053e50 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -45,10 +45,15 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + # image during runtime, only makes sense if + # target_blockdev_backing is not None + # (None: same as target_backing) ++# target_open_with_backing: If True, the target image is added with its backing ++# chain opened right away. If False, blockdev-add ++# opens it without a backing file and job completion ++# is supposed to open the backing chain. + + class BaseClass(iotests.QMPTestCase): + target_blockdev_backing = None + target_real_backing = None ++ target_open_with_backing = True + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') +@@ -80,9 +85,13 @@ class BaseClass(iotests.QMPTestCase): + options = { 'node-name': 'target', + 'driver': iotests.imgfmt, + 'file': { 'driver': 'file', ++ 'node-name': 'target-file', + 'filename': target_img } } +- if self.target_blockdev_backing: +- options['backing'] = self.target_blockdev_backing ++ ++ if not self.target_open_with_backing: ++ options['backing'] = None ++ elif self.target_blockdev_backing: ++ options['backing'] = self.target_blockdev_backing + + result = self.vm.qmp('blockdev-add', **options) + self.assert_qmp(result, 'return', {}) +@@ -147,10 +156,14 @@ class BaseClass(iotests.QMPTestCase): + # cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror + + class MirrorBaseClass(BaseClass): ++ def openBacking(self): ++ pass ++ + def runMirror(self, sync): + if self.cmd == 'blockdev-mirror': + result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', +- sync=sync, target='target') ++ sync=sync, target='target', ++ auto_finalize=False) + else: + if self.existing: + mode = 'existing' +@@ -159,11 +172,12 @@ class MirrorBaseClass(BaseClass): + result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', + sync=sync, target=target_img, + format=iotests.imgfmt, mode=mode, +- node_name='target') ++ node_name='target', auto_finalize=False) + + self.assert_qmp(result, 'return', {}) + +- self.complete_and_wait('mirror-job') ++ self.vm.run_job('mirror-job', use_log=False, auto_finalize=False, ++ pre_finalize=self.openBacking, auto_dismiss=True) + + def testFull(self): + self.runMirror('full') +@@ -221,6 +235,38 @@ class TestBlockdevMirrorForcedBacking(MirrorBaseClass): + target_blockdev_backing = { 'driver': 'null-co' } + target_real_backing = 'null-co://' + ++# Attach the backing chain only during completion, with blockdev-reopen ++class TestBlockdevMirrorReopen(MirrorBaseClass): ++ cmd = 'blockdev-mirror' ++ existing = True ++ target_backing = 'null-co://' ++ target_open_with_backing = False ++ ++ def openBacking(self): ++ if not self.target_open_with_backing: ++ result = self.vm.qmp('blockdev-add', node_name="backing", ++ driver="null-co") ++ self.assert_qmp(result, 'return', {}) ++ result = self.vm.qmp('x-blockdev-reopen', node_name="target", ++ driver=iotests.imgfmt, file="target-file", ++ backing="backing") ++ self.assert_qmp(result, 'return', {}) ++ ++# Attach the backing chain only during completion, with blockdev-snapshot ++class TestBlockdevMirrorSnapshot(MirrorBaseClass): ++ cmd = 'blockdev-mirror' ++ existing = True ++ target_backing = 'null-co://' ++ target_open_with_backing = False ++ ++ def openBacking(self): ++ if not self.target_open_with_backing: ++ result = self.vm.qmp('blockdev-add', node_name="backing", ++ driver="null-co") ++ self.assert_qmp(result, 'return', {}) ++ result = self.vm.qmp('blockdev-snapshot', node="backing", ++ overlay="target") ++ self.assert_qmp(result, 'return', {}) + + class TestCommit(BaseClass): + existing = False +diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out +index 4176bb9..4fd1c2d 100644 +--- a/tests/qemu-iotests/155.out ++++ b/tests/qemu-iotests/155.out +@@ -1,5 +1,5 @@ +-................... ++......................... + ---------------------------------------------------------------------- +-Ran 19 tests ++Ran 25 tests + + OK +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch b/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch new file mode 100644 index 0000000..38b41be --- /dev/null +++ b/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch @@ -0,0 +1,50 @@ +From 872fbd32d06bda4aba3a7e67a95f76f62e475dbe Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:27 +0000 +Subject: [PATCH 07/20] iotests: Use complete_and_wait() in 155 + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-2-kwolf@redhat.com> +Patchwork-id: 94279 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/13] iotests: Use complete_and_wait() in 155 +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Max Reitz + +This way, we get to see errors during the completion phase. + +Signed-off-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200218103454.296704-14-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 6644d0e6192b36cdf2902c9774e1afb8ab2e7223) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index e194859..d7ef257 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass): + + self.assert_qmp(result, 'return', {}) + +- self.vm.event_wait('BLOCK_JOB_READY') +- +- result = self.vm.qmp('block-job-complete', device='mirror-job') +- self.assert_qmp(result, 'return', {}) +- +- self.vm.event_wait('BLOCK_JOB_COMPLETED') ++ self.complete_and_wait('mirror-job') + + def testFull(self): + self.runMirror('full') +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-don-t-use-format-for-drive_add.patch b/SOURCES/kvm-iotests-don-t-use-format-for-drive_add.patch new file mode 100644 index 0000000..f95e17a --- /dev/null +++ b/SOURCES/kvm-iotests-don-t-use-format-for-drive_add.patch @@ -0,0 +1,81 @@ +From 127360c2fa0fefa18ff828bfec3985e04791d665 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:16 +0100 +Subject: [PATCH 17/26] iotests: don't use 'format' for drive_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-3-kwolf@redhat.com> +Patchwork-id: 97102 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 02/11] iotests: don't use 'format' for drive_add +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +From: John Snow + +It shadows (with a different type) the built-in format. +Use something else. + +Signed-off-by: John Snow +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Max Reitz +Message-Id: <20200331000014.11581-3-jsnow@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Max Reitz +(cherry picked from commit 1d3d4b630c6ea8b19420c097f0c448b6ded95072) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/055 | 3 ++- + tests/qemu-iotests/iotests.py | 6 +++--- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 +index c732a11..eb50c9f 100755 +--- a/tests/qemu-iotests/055 ++++ b/tests/qemu-iotests/055 +@@ -469,7 +469,8 @@ class TestDriveCompression(iotests.QMPTestCase): + qemu_img('create', '-f', fmt, blockdev_target_img, + str(TestDriveCompression.image_len), *args) + if attach_target: +- self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") ++ self.vm.add_drive(blockdev_target_img, ++ img_format=fmt, interface="none") + + self.vm.launch() + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 46f880c..be20d56 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -481,20 +481,20 @@ class VM(qtest.QEMUQtestMachine): + self._args.append(opts) + return self + +- def add_drive(self, path, opts='', interface='virtio', format=imgfmt): ++ def add_drive(self, path, opts='', interface='virtio', img_format=imgfmt): + '''Add a virtio-blk drive to the VM''' + options = ['if=%s' % interface, + 'id=drive%d' % self._num_drives] + + if path is not None: + options.append('file=%s' % path) +- options.append('format=%s' % format) ++ options.append('format=%s' % img_format) + options.append('cache=%s' % cachemode) + + if opts: + options.append(opts) + +- if format == 'luks' and 'key-secret' not in opts: ++ if img_format == 'luks' and 'key-secret' not in opts: + # default luks support + if luks_default_secret_object not in self._args: + self.add_object(luks_default_secret_object) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch b/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch new file mode 100644 index 0000000..cda8037 --- /dev/null +++ b/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch @@ -0,0 +1,123 @@ +From d6df1426ae65b3a0d50bdbb1f8a7246386dd6ebf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:04 +0000 +Subject: [PATCH 07/18] iotests.py: Let wait_migration wait even more + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-7-kwolf@redhat.com> +Patchwork-id: 93751 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] iotests.py: Let wait_migration wait even more +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Max Reitz + +The "migration completed" event may be sent (on the source, to be +specific) before the migration is actually completed, so the VM runstate +will still be "finish-migrate" instead of "postmigrate". So ask the +users of VM.wait_migration() to specify the final runstate they desire +and then poll the VM until it has reached that state. (This should be +over very quickly, so busy polling is fine.) + +Without this patch, I see intermittent failures in the new iotest 280 +under high system load. I have not yet seen such failures with other +iotests that use VM.wait_migration() and query-status afterwards, but +maybe they just occur even more rarely, or it is because they also wait +on the destination VM to be running. + +Signed-off-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 8da7969bd7014f6de037d8ae132b40721944b186) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/234 | 8 ++++---- + tests/qemu-iotests/262 | 4 ++-- + tests/qemu-iotests/280 | 2 +- + tests/qemu-iotests/iotests.py | 6 +++++- + 4 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 +index 34c818c..59a7f94 100755 +--- a/tests/qemu-iotests/234 ++++ b/tests/qemu-iotests/234 +@@ -69,9 +69,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_a.wait_migration() ++ vm_a.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_b.wait_migration() ++ vm_b.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +@@ -98,9 +98,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_b.wait_migration() ++ vm_b.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_a.wait_migration() ++ vm_a.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 +index 0963daa..bbcb526 100755 +--- a/tests/qemu-iotests/262 ++++ b/tests/qemu-iotests/262 +@@ -71,9 +71,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_a.wait_migration() ++ vm_a.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_b.wait_migration() ++ vm_b.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 +index 0b1fa8e..85e9114 100755 +--- a/tests/qemu-iotests/280 ++++ b/tests/qemu-iotests/280 +@@ -45,7 +45,7 @@ with iotests.FilePath('base') as base_path , \ + vm.qmp_log('migrate', uri='exec:cat > /dev/null') + + with iotests.Timeout(3, 'Migration does not complete'): +- vm.wait_migration() ++ vm.wait_migration('postmigrate') + + iotests.log('\nVM is now stopped:') + iotests.log(vm.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 5741efb..0c55f7b 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -663,12 +663,16 @@ class VM(qtest.QEMUQtestMachine): + } + ])) + +- def wait_migration(self): ++ def wait_migration(self, expect_runstate): + while True: + event = self.event_wait('MIGRATION') + log(event, filters=[filter_qmp_event]) + if event['data']['status'] == 'completed': + break ++ # The event may occur in finish-migrate, so wait for the expected ++ # post-migration runstate ++ while self.qmp('query-status')['return']['status'] != expect_runstate: ++ pass + + def node_info(self, node_name): + nodes = self.qmp('query-named-block-nodes') +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch new file mode 100644 index 0000000..2ee9dcd --- /dev/null +++ b/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch @@ -0,0 +1,79 @@ +From 1c508d56d154caf5fbf53e7dabafd707236cb16b Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Wed, 29 Jan 2020 13:45:18 +0000 +Subject: [PATCH 06/15] iscsi: Cap block count from GET LBA STATUS + (CVE-2020-1711) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200129134518.1293-2-jmaloy@redhat.com> +Patchwork-id: 93571 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) +Bugzilla: 1794503 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Philippe Mathieu-Daudé + +From: Felipe Franciosi + +When querying an iSCSI server for the provisioning status of blocks (via +GET LBA STATUS), Qemu only validates that the response descriptor zero's +LBA matches the one requested. Given the SCSI spec allows servers to +respond with the status of blocks beyond the end of the LUN, Qemu may +have its heap corrupted by clearing/setting too many bits at the end of +its allocmap for the LUN. + +A malicious guest in control of the iSCSI server could carefully program +Qemu's heap (by selectively setting the bitmap) and then smash it. + +This limits the number of bits that iscsi_co_block_status() will try to +update in the allocmap so it can't overflow the bitmap. + +Fixes: CVE-2020-1711 +Cc: qemu-stable@nongnu.org +Signed-off-by: Felipe Franciosi +Signed-off-by: Peter Turschmid +Signed-off-by: Raphael Norwitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 693fd2acdf14dd86c0bf852610f1c2cca80a74dc) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + block/iscsi.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index 2aea7e3..cbd5729 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -701,7 +701,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + struct scsi_get_lba_status *lbas = NULL; + struct scsi_lba_status_descriptor *lbasd = NULL; + struct IscsiTask iTask; +- uint64_t lba; ++ uint64_t lba, max_bytes; + int ret; + + iscsi_co_init_iscsitask(iscsilun, &iTask); +@@ -721,6 +721,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + } + + lba = offset / iscsilun->block_size; ++ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; + + qemu_mutex_lock(&iscsilun->mutex); + retry: +@@ -764,7 +765,7 @@ retry: + goto out_unlock; + } + +- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; ++ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); + + if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || + lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch b/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch new file mode 100644 index 0000000..a6d0baf --- /dev/null +++ b/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch @@ -0,0 +1,113 @@ +From 58b7d33e1bc17b89103ceaa39f5722a69b35d810 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:45 +0000 +Subject: [PATCH 04/20] iscsi: Drop iscsi_co_create_opts() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-5-mlevitsk@redhat.com> +Patchwork-id: 94226 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] iscsi: Drop iscsi_co_create_opts() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +The generic fallback implementation effectively does the same. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-5-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit 80f0900905b555f00d644894c786b6d66ac2e00e) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/iscsi.c | 56 -------------------------------------------------------- + 1 file changed, 56 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index cbd5729..b45da65 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -2164,58 +2164,6 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, + return 0; + } + +-static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, +- Error **errp) +-{ +- int ret = 0; +- int64_t total_size = 0; +- BlockDriverState *bs; +- IscsiLun *iscsilun = NULL; +- QDict *bs_options; +- Error *local_err = NULL; +- +- bs = bdrv_new(); +- +- /* Read out options */ +- total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), +- BDRV_SECTOR_SIZE); +- bs->opaque = g_new0(struct IscsiLun, 1); +- iscsilun = bs->opaque; +- +- bs_options = qdict_new(); +- iscsi_parse_filename(filename, bs_options, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- ret = -EINVAL; +- } else { +- ret = iscsi_open(bs, bs_options, 0, NULL); +- } +- qobject_unref(bs_options); +- +- if (ret != 0) { +- goto out; +- } +- iscsi_detach_aio_context(bs); +- if (iscsilun->type != TYPE_DISK) { +- ret = -ENODEV; +- goto out; +- } +- if (bs->total_sectors < total_size) { +- ret = -ENOSPC; +- goto out; +- } +- +- ret = 0; +-out: +- if (iscsilun->iscsi != NULL) { +- iscsi_destroy_context(iscsilun->iscsi); +- } +- g_free(bs->opaque); +- bs->opaque = NULL; +- bdrv_unref(bs); +- return ret; +-} +- + static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) + { + IscsiLun *iscsilun = bs->opaque; +@@ -2486,8 +2434,6 @@ static BlockDriver bdrv_iscsi = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, +- .bdrv_co_create_opts = iscsi_co_create_opts, +- .create_opts = &iscsi_create_opts, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +@@ -2525,8 +2471,6 @@ static BlockDriver bdrv_iser = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, +- .bdrv_co_create_opts = iscsi_co_create_opts, +- .create_opts = &iscsi_create_opts, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch b/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch new file mode 100644 index 0000000..e38428b --- /dev/null +++ b/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch @@ -0,0 +1,213 @@ +From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:12 +0100 +Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-2-kwolf@redhat.com> +Patchwork-id: 94597 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +All callers of job_txn_apply hold a single job's lock, but different +jobs within a transaction can have different contexts, thus we need to +lock each one individually before applying the callback function. + +Similar to job_completed_txn_abort this also requires releasing the +caller's context before and reacquiring it after to avoid recursive +locks which might break AIO_WAIT_WHILE in the callback. This is safe, since +existing code would already have to take this into account, lest +job_completed_txn_abort might have broken. + +This also brings to light a different issue: When a callback function in +job_txn_apply moves it's job to a different AIO context, callers will +try to release the wrong lock (now that we re-acquire the lock +correctly, previously it would just continue with the old lock, leaving +the job unlocked for the rest of the return path). Fix this by not caching +the job's context. + +This is only necessary for qmp_block_job_finalize, qmp_job_finalize and +job_exit, since everyone else calls through job_exit. + +One test needed adapting, since it calls job_finalize directly, so it +manually needs to acquire the correct context. + +Signed-off-by: Stefan Reiter +Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 9 +++++++++ + job-qmp.c | 9 +++++++++ + job.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- + tests/test-blockjob.c | 2 ++ + 4 files changed, 60 insertions(+), 10 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index c8d4b51..86eb115 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp) + } + + trace_qmp_block_job_finalize(job); ++ job_ref(&job->job); + job_finalize(&job->job, errp); ++ ++ /* ++ * Job's context might have changed via job_finalize (and job_txn_apply ++ * automatically acquires the new one), so make sure we release the correct ++ * one. ++ */ ++ aio_context = blk_get_aio_context(job->blk); ++ job_unref(&job->job); + aio_context_release(aio_context); + } + +diff --git a/job-qmp.c b/job-qmp.c +index fbfed25..a201220 100644 +--- a/job-qmp.c ++++ b/job-qmp.c +@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp) + } + + trace_qmp_job_finalize(job); ++ job_ref(job); + job_finalize(job, errp); ++ ++ /* ++ * Job's context might have changed via job_finalize (and job_txn_apply ++ * automatically acquires the new one), so make sure we release the correct ++ * one. ++ */ ++ aio_context = job->aio_context; ++ job_unref(job); + aio_context_release(aio_context); + } + +diff --git a/job.c b/job.c +index 04409b4..48fc4ad 100644 +--- a/job.c ++++ b/job.c +@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job) + } + } + +-static int job_txn_apply(JobTxn *txn, int fn(Job *)) ++static int job_txn_apply(Job *job, int fn(Job *)) + { +- Job *job, *next; ++ AioContext *inner_ctx; ++ Job *other_job, *next; ++ JobTxn *txn = job->txn; + int rc = 0; + +- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { +- rc = fn(job); ++ /* ++ * Similar to job_completed_txn_abort, we take each job's lock before ++ * applying fn, but since we assume that outer_ctx is held by the caller, ++ * we need to release it here to avoid holding the lock twice - which would ++ * break AIO_WAIT_WHILE from within fn. ++ */ ++ job_ref(job); ++ aio_context_release(job->aio_context); ++ ++ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { ++ inner_ctx = other_job->aio_context; ++ aio_context_acquire(inner_ctx); ++ rc = fn(other_job); ++ aio_context_release(inner_ctx); + if (rc) { + break; + } + } ++ ++ /* ++ * Note that job->aio_context might have been changed by calling fn, so we ++ * can't use a local variable to cache it. ++ */ ++ aio_context_acquire(job->aio_context); ++ job_unref(job); + return rc; + } + +@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job) + assert(job && job->txn); + + /* prepare the transaction to complete */ +- rc = job_txn_apply(job->txn, job_prepare); ++ rc = job_txn_apply(job, job_prepare); + if (rc) { + job_completed_txn_abort(job); + } else { +- job_txn_apply(job->txn, job_finalize_single); ++ job_txn_apply(job, job_finalize_single); + } + } + +@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job) + assert(other_job->ret == 0); + } + +- job_txn_apply(txn, job_transition_to_pending); ++ job_txn_apply(job, job_transition_to_pending); + + /* If no jobs need manual finalization, automatically do so */ +- if (job_txn_apply(txn, job_needs_finalize) == 0) { ++ if (job_txn_apply(job, job_needs_finalize) == 0) { + job_do_finalize(job); + } + } +@@ -849,9 +870,10 @@ static void job_completed(Job *job) + static void job_exit(void *opaque) + { + Job *job = (Job *)opaque; +- AioContext *ctx = job->aio_context; ++ AioContext *ctx; + +- aio_context_acquire(ctx); ++ job_ref(job); ++ aio_context_acquire(job->aio_context); + + /* This is a lie, we're not quiescent, but still doing the completion + * callbacks. However, completion callbacks tend to involve operations that +@@ -862,6 +884,14 @@ static void job_exit(void *opaque) + + job_completed(job); + ++ /* ++ * Note that calling job_completed can move the job to a different ++ * aio_context, so we cannot cache from above. job_txn_apply takes care of ++ * acquiring the new lock, and we ref/unref to avoid job_completed freeing ++ * the job underneath us. ++ */ ++ ctx = job->aio_context; ++ job_unref(job); + aio_context_release(ctx); + } + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index 7844c9f..6d857fd 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -368,7 +368,9 @@ static void test_cancel_concluded(void) + aio_poll(qemu_get_aio_context(), true); + assert(job->status == JOB_STATUS_PENDING); + ++ aio_context_acquire(job->aio_context); + job_finalize(job, &error_abort); ++ aio_context_release(job->aio_context); + assert(job->status == JOB_STATUS_CONCLUDED); + + cancel_common(s); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch b/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch new file mode 100644 index 0000000..e362efe --- /dev/null +++ b/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch @@ -0,0 +1,117 @@ +From ee360b70f179cf540faebe7e55b34e323e2bb179 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:09 +0100 +Subject: [PATCH 098/116] libvhost-user: Fix some memtable remap cases +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-95-dgilbert@redhat.com> +Patchwork-id: 93548 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 094/112] libvhost-user: Fix some memtable remap cases +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +If a new setmemtable command comes in once the vhost threads are +running, it will remap the guests address space and the threads +will now be looking in the wrong place. + +Fortunately we're running this command under lock, so we can +update the queue mappings so that threads will look in the new-right +place. + +Note: This doesn't fix things that the threads might be doing +without a lock (e.g. a readv/writev!) That's for another time. + +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 49e9ec749d4db62ae51f76354143cee183912a1d) +Signed-off-by: Miroslav Rezanina +--- + contrib/libvhost-user/libvhost-user.c | 33 +++++++++++++++++++++++++-------- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index 63e4106..b89bf18 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -565,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) + } + + static bool ++map_ring(VuDev *dev, VuVirtq *vq) ++{ ++ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); ++ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); ++ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); ++ ++ DPRINT("Setting virtq addresses:\n"); ++ DPRINT(" vring_desc at %p\n", vq->vring.desc); ++ DPRINT(" vring_used at %p\n", vq->vring.used); ++ DPRINT(" vring_avail at %p\n", vq->vring.avail); ++ ++ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); ++} ++ ++static bool + vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) + { + int i; +@@ -767,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) + close(vmsg->fds[i]); + } + ++ for (i = 0; i < dev->max_queues; i++) { ++ if (dev->vq[i].vring.desc) { ++ if (map_ring(dev, &dev->vq[i])) { ++ vu_panic(dev, "remaping queue %d during setmemtable", i); ++ } ++ } ++ } ++ + return false; + } + +@@ -853,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) + DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); + DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); + ++ vq->vra = *vra; + vq->vring.flags = vra->flags; +- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); +- vq->vring.used = qva_to_va(dev, vra->used_user_addr); +- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); + vq->vring.log_guest_addr = vra->log_guest_addr; + +- DPRINT("Setting virtq addresses:\n"); +- DPRINT(" vring_desc at %p\n", vq->vring.desc); +- DPRINT(" vring_used at %p\n", vq->vring.used); +- DPRINT(" vring_avail at %p\n", vq->vring.avail); + +- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { ++ if (map_ring(dev, vq)) { + vu_panic(dev, "Invalid vring_addr message"); + return false; + } +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 1844b6f..5cb7708 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -327,6 +327,9 @@ typedef struct VuVirtq { + int err_fd; + unsigned int enable; + bool started; ++ ++ /* Guest addresses of our ring */ ++ struct vhost_vring_addr vra; + } VuVirtq; + + enum VuWatchCondtion { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-linux-headers-support-vfio-ccw-features.patch b/SOURCES/kvm-linux-headers-support-vfio-ccw-features.patch new file mode 100644 index 0000000..4eb95bf --- /dev/null +++ b/SOURCES/kvm-linux-headers-support-vfio-ccw-features.patch @@ -0,0 +1,77 @@ +From 1da0eecb9f2086c880fdaf1260ae775bbfbf5f02 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:37 -0400 +Subject: [PATCH 03/12] linux-headers: support vfio-ccw features + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-4-cohuck@redhat.com> +Patchwork-id: 97696 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 3/9] linux-headers: support vfio-ccw features +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +Partial update to support CRW and SCHIB regions. + +Upstream: n/a + +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + linux-headers/linux/vfio.h | 3 +++ + linux-headers/linux/vfio_ccw.h | 19 +++++++++++++++++++ + 2 files changed, 22 insertions(+) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index fb10370d29..9e227348b3 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -378,6 +378,8 @@ struct vfio_region_gfx_edid { + + /* sub-types for VFIO_REGION_TYPE_CCW */ + #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) ++#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) ++#define VFIO_REGION_SUBTYPE_CCW_CRW (3) + + /* + * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped +@@ -577,6 +579,7 @@ enum { + + enum { + VFIO_CCW_IO_IRQ_INDEX, ++ VFIO_CCW_CRW_IRQ_INDEX, + VFIO_CCW_NUM_IRQS + }; + +diff --git a/linux-headers/linux/vfio_ccw.h b/linux-headers/linux/vfio_ccw.h +index fcc3e69ef5..6375d6ff25 100644 +--- a/linux-headers/linux/vfio_ccw.h ++++ b/linux-headers/linux/vfio_ccw.h +@@ -34,4 +34,23 @@ struct ccw_cmd_region { + __u32 ret_code; + } __attribute__((packed)); + ++/* ++ * Used for processing commands that read the subchannel-information block ++ * Reading this region triggers a stsch() to hardware ++ * Note: this is controlled by a capability ++ */ ++struct ccw_schib_region { ++#define SCHIB_AREA_SIZE 52 ++ __u8 schib_area[SCHIB_AREA_SIZE]; ++} __attribute__((packed)); ++ ++/* ++ * Used for returning a Channel Report Word to userspace. ++ * Note: this is controlled by a capability ++ */ ++struct ccw_crw_region { ++ __u32 crw; ++ __u32 pad; ++} __attribute__((packed)); ++ + #endif +-- +2.27.0 + diff --git a/SOURCES/kvm-linux-headers-update-kvm.h.patch b/SOURCES/kvm-linux-headers-update-kvm.h.patch new file mode 100644 index 0000000..1834e33 --- /dev/null +++ b/SOURCES/kvm-linux-headers-update-kvm.h.patch @@ -0,0 +1,119 @@ +From 9d1b94d3739567245578f30866facc13edb3be92 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:44 -0400 +Subject: [PATCH 02/42] linux-headers: update kvm.h + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-3-thuth@redhat.com> +Patchwork-id: 97020 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 02/38] linux-headers: update kvm.h +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Upstream-status: n/a + +Update kvm.h for the upcoming new s390x reset and protected virtualization +ioctls. This patch is based on commit ddda37483dd17c9936fdde9ebf8f6ca2692b3842 +and commit dc6f8d458a4ccc360723993f31d310d06469f55f, but I dropped all +(unrequired) changes to the other linux-header files. + +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + linux-headers/linux/kvm.h | 55 +++++++++++++++++++++++++++++++++++++-- + 1 file changed, 53 insertions(+), 2 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 3d9b18f7f8..578cd97c0d 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -468,12 +468,17 @@ struct kvm_s390_mem_op { + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ +- __u8 ar; /* the access register number */ +- __u8 reserved[31]; /* should be set to 0 */ ++ union { ++ __u8 ar; /* the access register number */ ++ __u32 sida_offset; /* offset into the sida */ ++ __u8 reserved[32]; /* should be set to 0 */ ++ }; + }; + /* types for kvm_s390_mem_op->op */ + #define KVM_S390_MEMOP_LOGICAL_READ 0 + #define KVM_S390_MEMOP_LOGICAL_WRITE 1 ++#define KVM_S390_MEMOP_SIDA_READ 2 ++#define KVM_S390_MEMOP_SIDA_WRITE 3 + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +@@ -1000,6 +1005,12 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_PMU_EVENT_FILTER 173 + #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 + #define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 ++#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 ++#define KVM_CAP_ARM_NISV_TO_USER 177 ++#define KVM_CAP_ARM_INJECT_EXT_DABT 178 ++#define KVM_CAP_S390_VCPU_RESETS 179 ++#define KVM_CAP_S390_PROTECTED 180 ++#define KVM_CAP_PPC_SECURE_GUEST 181 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1461,6 +1472,43 @@ struct kvm_enc_region { + /* Available with KVM_CAP_ARM_SVE */ + #define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) + ++/* Available with KVM_CAP_S390_VCPU_RESETS */ ++#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) ++#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) ++ ++struct kvm_s390_pv_sec_parm { ++ __u64 origin; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_unp { ++ __u64 addr; ++ __u64 size; ++ __u64 tweak; ++}; ++ ++enum pv_cmd_id { ++ KVM_PV_ENABLE, ++ KVM_PV_DISABLE, ++ KVM_PV_SET_SEC_PARMS, ++ KVM_PV_UNPACK, ++ KVM_PV_VERIFY, ++ KVM_PV_PREP_RESET, ++ KVM_PV_UNSHARE_ALL, ++}; ++ ++struct kvm_pv_cmd { ++ __u32 cmd; /* Command to be executed */ ++ __u16 rc; /* Ultravisor return code */ ++ __u16 rrc; /* Ultravisor return reason code */ ++ __u64 data; /* Data or address */ ++ __u32 flags; /* flags for future extensions. Must be 0 for now */ ++ __u32 reserved[3]; ++}; ++ ++/* Available with KVM_CAP_S390_PROTECTED */ ++#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) ++ + /* Secure Encrypted Virtualization command */ + enum sev_cmd_id { + /* Guest initialization commands */ +@@ -1611,4 +1659,7 @@ struct kvm_hyperv_eventfd { + #define KVM_HYPERV_CONN_ID_MASK 0x00ffffff + #define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + ++#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) ++#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) ++ + #endif /* __LINUX_KVM_H */ +-- +2.27.0 + diff --git a/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch b/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch new file mode 100644 index 0000000..3477af5 --- /dev/null +++ b/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch @@ -0,0 +1,179 @@ +From 38a032829b6b8d523b4cee05f732031e66fc2e41 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:56 +0000 +Subject: [PATCH 14/15] migration: Change SaveStateEntry.instance_id into + uint32_t + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-3-peterx@redhat.com> +Patchwork-id: 93629 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] migration: Change SaveStateEntry.instance_id into uint32_t +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +It was always used as 32bit, so define it as used to be clear. +Instead of using -1 as the auto-gen magic value, we switch to +UINT32_MAX. We also make sure that we don't auto-gen this value to +avoid overflowed instance IDs without being noticed. + +Suggested-by: Juan Quintela +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 93062e23619e057743757ee53bf7f8e07f7a3710) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + include/migration/vmstate.h + migration/savevm.c + stubs/vmstate.c + Due to missing 3cad405bab ("vmstate: replace DeviceState with + VMStateIf", 2020-01-06) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/apic_common.c | 2 +- + include/migration/register.h | 2 +- + include/migration/vmstate.h | 2 +- + migration/savevm.c | 18 ++++++++++-------- + stubs/vmstate.c | 2 +- + 5 files changed, 14 insertions(+), 12 deletions(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index f2c3a7f..54b8731 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -268,7 +268,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- int instance_id = s->id; ++ uint32_t instance_id = s->id; + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +diff --git a/include/migration/register.h b/include/migration/register.h +index a13359a..f3ba10b 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -69,7 +69,7 @@ typedef struct SaveVMHandlers { + } SaveVMHandlers; + + int register_savevm_live(const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque); +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index 883f1cf..296609c 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1158,7 +1158,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + #define VMSTATE_INSTANCE_ID_ANY -1 + + /* Returns: 0 on success, -1 on failure */ +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +diff --git a/migration/savevm.c b/migration/savevm.c +index e2e8e0a..a80bb52 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -233,7 +233,7 @@ typedef struct CompatEntry { + typedef struct SaveStateEntry { + QTAILQ_ENTRY(SaveStateEntry) entry; + char idstr[256]; +- int instance_id; ++ uint32_t instance_id; + int alias_id; + int version_id; + /* version id read from the stream */ +@@ -665,10 +665,10 @@ void dump_vmstate_json_to_file(FILE *out_file) + fclose(out_file); + } + +-static int calculate_new_instance_id(const char *idstr) ++static uint32_t calculate_new_instance_id(const char *idstr) + { + SaveStateEntry *se; +- int instance_id = 0; ++ uint32_t instance_id = 0; + + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { + if (strcmp(idstr, se->idstr) == 0 +@@ -676,6 +676,8 @@ static int calculate_new_instance_id(const char *idstr) + instance_id = se->instance_id + 1; + } + } ++ /* Make sure we never loop over without being noticed */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + return instance_id; + } + +@@ -730,7 +732,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) + Meanwhile pass -1 as instance_id if you do not already have a clearly + distinguishing id for all instances of your device class. */ + int register_savevm_live(const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque) +@@ -784,7 +786,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) + } + } + +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *opaque, int alias_id, + int required_for_version, +@@ -1600,7 +1602,7 @@ int qemu_save_device_state(QEMUFile *f) + return qemu_file_get_error(f); + } + +-static SaveStateEntry *find_se(const char *idstr, int instance_id) ++static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) + { + SaveStateEntry *se; + +@@ -2267,7 +2269,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + /* Find savevm section */ + se = find_se(idstr, instance_id); + if (se == NULL) { +- error_report("Unknown savevm section or instance '%s' %d. " ++ error_report("Unknown savevm section or instance '%s' %"PRIu32". " + "Make sure that your current VM setup matches your " + "saved VM setup, including any hotplugged devices", + idstr, instance_id); +@@ -2291,7 +2293,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + + ret = vmstate_load(f, se); + if (ret < 0) { +- error_report("error while loading state for instance 0x%x of" ++ error_report("error while loading state for instance 0x%"PRIx32" of" + " device '%s'", instance_id, idstr); + return ret; + } +diff --git a/stubs/vmstate.c b/stubs/vmstate.c +index e1e89b8..4ed5cc6 100644 +--- a/stubs/vmstate.c ++++ b/stubs/vmstate.c +@@ -4,7 +4,7 @@ + const VMStateDescription vmstate_dummy = {}; + + int vmstate_register_with_alias_id(DeviceState *dev, +- int instance_id, ++ uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Create-migration_is_running.patch b/SOURCES/kvm-migration-Create-migration_is_running.patch new file mode 100644 index 0000000..c9593de --- /dev/null +++ b/SOURCES/kvm-migration-Create-migration_is_running.patch @@ -0,0 +1,119 @@ +From c9e3d13d70a24bf606ce351886b27bdca25ef4dc Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:41 +0000 +Subject: [PATCH 09/18] migration: Create migration_is_running() + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-9-quintela@redhat.com> +Patchwork-id: 94115 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/10] migration: Create migration_is_running() +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +This function returns true if we are in the middle of a migration. +It is like migration_is_setup_or_active() with CANCELLING and COLO. +Adapt all callers that are needed. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 392d87e21325fdb01210176faa07472b4985ccf0) +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 29 ++++++++++++++++++++++++----- + migration/migration.h | 1 + + migration/savevm.c | 4 +--- + 3 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 30c53c6..eb50d77 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -831,6 +831,27 @@ bool migration_is_setup_or_active(int state) + } + } + ++bool migration_is_running(int state) ++{ ++ switch (state) { ++ case MIGRATION_STATUS_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_PAUSED: ++ case MIGRATION_STATUS_POSTCOPY_RECOVER: ++ case MIGRATION_STATUS_SETUP: ++ case MIGRATION_STATUS_PRE_SWITCHOVER: ++ case MIGRATION_STATUS_DEVICE: ++ case MIGRATION_STATUS_WAIT_UNPLUG: ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_COLO: ++ return true; ++ ++ default: ++ return false; ++ ++ } ++} ++ + static void populate_time_info(MigrationInfo *info, MigrationState *s) + { + info->has_status = true; +@@ -1090,7 +1111,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + MigrationCapabilityStatusList *cap; + bool cap_list[MIGRATION_CAPABILITY__MAX]; + +- if (migration_is_setup_or_active(s->state)) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } +@@ -1603,7 +1624,7 @@ static void migrate_fd_cancel(MigrationState *s) + + do { + old_state = s->state; +- if (!migration_is_setup_or_active(old_state)) { ++ if (!migration_is_running(old_state)) { + break; + } + /* If the migration is paused, kick it out of the pause */ +@@ -1900,9 +1921,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + return true; + } + +- if (migration_is_setup_or_active(s->state) || +- s->state == MIGRATION_STATUS_CANCELLING || +- s->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return false; + } +diff --git a/migration/migration.h b/migration/migration.h +index 0b1b0d4..a2b2336 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -279,6 +279,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); + void migrate_fd_connect(MigrationState *s, Error *error_in); + + bool migration_is_setup_or_active(int state); ++bool migration_is_running(int state); + + void migrate_init(MigrationState *s); + bool migration_is_blocked(Error **errp); +diff --git a/migration/savevm.c b/migration/savevm.c +index a80bb52..144ecf0 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1506,9 +1506,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + MigrationState *ms = migrate_get_current(); + MigrationStatus status; + +- if (migration_is_setup_or_active(ms->state) || +- ms->state == MIGRATION_STATUS_CANCELLING || +- ms->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(ms->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return -EINVAL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch new file mode 100644 index 0000000..c2ead53 --- /dev/null +++ b/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch @@ -0,0 +1,257 @@ +From 2659af9267586fb626f543773bf3f844727e473b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:55 +0000 +Subject: [PATCH 13/15] migration: Define VMSTATE_INSTANCE_ID_ANY + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-2-peterx@redhat.com> +Patchwork-id: 93630 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] migration: Define VMSTATE_INSTANCE_ID_ANY +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to +auto-generate the vmstate instance ID. Previously it was hard coded +as -1 instead of this macro. It helps to change this default value in +the follow up patches. No functional change. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 1df2c9a26fcb2fa32d099f8e9adcdae4207872e3) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + backends/dbus-vmstate.c + File deleted + hw/core/qdev.c + hw/misc/max111x.c + hw/net/eepro100.c + Due to missing commit 3cad405bab ("vmstate: replace + DeviceState with VMStateIf", 2020-01-06) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/stellaris.c | 2 +- + hw/core/qdev.c | 3 ++- + hw/display/ads7846.c | 2 +- + hw/i2c/core.c | 2 +- + hw/input/stellaris_input.c | 3 ++- + hw/intc/apic_common.c | 2 +- + hw/misc/max111x.c | 2 +- + hw/net/eepro100.c | 2 +- + hw/pci/pci.c | 2 +- + hw/ppc/spapr.c | 2 +- + hw/timer/arm_timer.c | 2 +- + hw/tpm/tpm_emulator.c | 3 ++- + include/migration/vmstate.h | 2 ++ + migration/savevm.c | 8 ++++---- + 14 files changed, 21 insertions(+), 16 deletions(-) + +diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c +index b198066..bb025e0 100644 +--- a/hw/arm/stellaris.c ++++ b/hw/arm/stellaris.c +@@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, + memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); + memory_region_add_subregion(get_system_memory(), base, &s->iomem); + ssys_reset(s); +- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); + return 0; + } + +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index cf1ba28..40f6b2b 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -890,7 +890,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) + dev->canonical_path = object_get_canonical_path(OBJECT(dev)); + + if (qdev_get_vmsd(dev)) { +- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, ++ if (vmstate_register_with_alias_id(dev, VMSTATE_INSTANCE_ID_ANY, ++ qdev_get_vmsd(dev), dev, + dev->instance_id_alias, + dev->alias_required_for_version, + &local_err) < 0) { +diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c +index c12272a..9228b40 100644 +--- a/hw/display/ads7846.c ++++ b/hw/display/ads7846.c +@@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) + + ads7846_int_update(s); + +- vmstate_register(NULL, -1, &vmstate_ads7846, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); + } + + static void ads7846_class_init(ObjectClass *klass, void *data) +diff --git a/hw/i2c/core.c b/hw/i2c/core.c +index 92cd489..d770035 100644 +--- a/hw/i2c/core.c ++++ b/hw/i2c/core.c +@@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) + + bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); + QLIST_INIT(&bus->current_devs); +- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); + return bus; + } + +diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c +index 59892b0..e6ee5e1 100644 +--- a/hw/input/stellaris_input.c ++++ b/hw/input/stellaris_input.c +@@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) + } + s->num_buttons = n; + qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); +- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_stellaris_gamepad, s); + } +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index 375cb6a..f2c3a7f 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -284,7 +284,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + } + + if (s->legacy_instance_id) { +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, + s, -1, 0, NULL); +diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c +index a713149..81ee73e 100644 +--- a/hw/misc/max111x.c ++++ b/hw/misc/max111x.c +@@ -146,7 +146,7 @@ static int max111x_init(SSISlave *d, int inputs) + s->input[7] = 0x80; + s->com = 0; + +- vmstate_register(dev, -1, &vmstate_max111x, s); ++ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, &vmstate_max111x, s); + return 0; + } + +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index cc2dd8b..39920c6 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -1874,7 +1874,7 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) + + s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); + s->vmstate->name = qemu_get_queue(s->nic)->model; +- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); ++ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, s->vmstate, s); + } + + static void eepro100_instance_init(Object *obj) +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index cbc7a32..fed019d 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -124,7 +124,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) + bus->machine_done.notify = pcibus_machine_done; + qemu_add_machine_init_done_notifier(&bus->machine_done); + +- vmstate_register(NULL, -1, &vmstate_pcibus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); + } + + static void pcie_bus_realize(BusState *qbus, Error **errp) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 8749c72..c12862d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3028,7 +3028,7 @@ static void spapr_machine_init(MachineState *machine) + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ + vmstate_register(NULL, 0, &vmstate_spapr, spapr); +- register_savevm_live("spapr/htab", -1, 1, ++ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_htab_handlers, spapr); + + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), +diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c +index af524fa..beaa285 100644 +--- a/hw/timer/arm_timer.c ++++ b/hw/timer/arm_timer.c +@@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) + s->control = TIMER_CTRL_IE; + + s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); +- vmstate_register(NULL, -1, &vmstate_arm_timer, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); + return s; + } + +diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c +index 22f9113..da7b490 100644 +--- a/hw/tpm/tpm_emulator.c ++++ b/hw/tpm/tpm_emulator.c +@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) + tpm_emu->cur_locty_number = ~0; + qemu_mutex_init(&tpm_emu->mutex); + +- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_tpm_emulator, obj); + } + + /* +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index ac4f46a..883f1cf 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1155,6 +1155,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, + + bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + ++#define VMSTATE_INSTANCE_ID_ANY -1 ++ + /* Returns: 0 on success, -1 on failure */ + int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + const VMStateDescription *vmsd, +diff --git a/migration/savevm.c b/migration/savevm.c +index a71b930..e2e8e0a 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -750,7 +750,7 @@ int register_savevm_live(const char *idstr, + + pstrcat(se->idstr, sizeof(se->idstr), idstr); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +@@ -817,14 +817,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + + se->compat = g_new0(CompatEntry, 1); + pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); +- se->compat->instance_id = instance_id == -1 ? ++ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? + calculate_compat_instance_id(vmsd->name) : instance_id; +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + } + pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch b/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch new file mode 100644 index 0000000..9a36714 --- /dev/null +++ b/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch @@ -0,0 +1,42 @@ +From ab07e0b41c50a85940d798a9a65a58698fd2edfb Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:40 +0000 +Subject: [PATCH 08/18] migration: Don't send data if we have stopped + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-8-quintela@redhat.com> +Patchwork-id: 94114 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/10] migration: Don't send data if we have stopped +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +If we do a cancel, we got out without one error, but we can't do the +rest of the output as in a normal situation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit b69a0227a803256ad270283872d40ff768f4d56d) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index a0257ee..902c56c 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3511,7 +3511,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- if (ret >= 0) { ++ if (ret >= 0 ++ && migration_is_setup_or_active(migrate_get_current()->state)) { + multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch b/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch new file mode 100644 index 0000000..01cb0f1 --- /dev/null +++ b/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch @@ -0,0 +1,94 @@ +From 71b05ab5782aa1e38c016be6264a14f5650d2a87 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:35 +0000 +Subject: [PATCH 03/18] migration: Make sure that we don't call write() in case + of error + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-3-quintela@redhat.com> +Patchwork-id: 94113 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +If we are exiting due to an error/finish/.... Just don't try to even +touch the channel with one IO operation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Juan Quintela +(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 65580e3..8c783b3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -899,6 +899,12 @@ struct { + uint64_t packet_num; + /* send channels ready */ + QemuSemaphore channels_ready; ++ /* ++ * Have we already run terminate threads. There is a race when it ++ * happens that we got one error while we are exiting. ++ * We will use atomic operations. Only valid values are 0 and 1. ++ */ ++ int exiting; + } *multifd_send_state; + + /* +@@ -927,6 +933,10 @@ static int multifd_send_pages(RAMState *rs) + MultiFDPages_t *pages = multifd_send_state->pages; + uint64_t transferred; + ++ if (atomic_read(&multifd_send_state->exiting)) { ++ return -1; ++ } ++ + qemu_sem_wait(&multifd_send_state->channels_ready); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { + p = &multifd_send_state->params[i]; +@@ -1008,6 +1018,16 @@ static void multifd_send_terminate_threads(Error *err) + } + } + ++ /* ++ * We don't want to exit each threads twice. Depending on where ++ * we get the error, or if there are two independent errors in two ++ * threads at the same time, we can end calling this function ++ * twice. ++ */ ++ if (atomic_xchg(&multifd_send_state->exiting, 1)) { ++ return; ++ } ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -1117,6 +1137,10 @@ static void *multifd_send_thread(void *opaque) + + while (true) { + qemu_sem_wait(&p->sem); ++ ++ if (atomic_read(&multifd_send_state->exiting)) { ++ break; ++ } + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +@@ -1225,6 +1249,7 @@ int multifd_save_setup(void) + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->pages = multifd_pages_init(page_count); + qemu_sem_init(&multifd_send_state->channels_ready, 0); ++ atomic_set(&multifd_send_state->exiting, 0); + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch new file mode 100644 index 0000000..4a7fb28 --- /dev/null +++ b/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch @@ -0,0 +1,70 @@ +From 3c4f6f0c2bf5562f2aa26f964848ae53e6ac4790 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:43 +0000 +Subject: [PATCH 11/18] migration: Maybe VM is paused when migration is + cancelled + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-11-quintela@redhat.com> +Patchwork-id: 94120 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/10] migration: Maybe VM is paused when migration is cancelled +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Zhimin Feng + +If the migration is cancelled when it is in the completion phase, +the migration state is set to MIGRATION_STATUS_CANCELLING. +The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause +function, so that VM always is paused. + +Reported-by: Euler Robot +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 8958338b10abcb346b54a8038a491fda2db1c853) +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index eb50d77..ed18c59 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2786,14 +2786,22 @@ static int migration_maybe_pause(MigrationState *s, + /* This block intentionally left blank */ + } + +- qemu_mutex_unlock_iothread(); +- migrate_set_state(&s->state, *current_active_state, +- MIGRATION_STATUS_PRE_SWITCHOVER); +- qemu_sem_wait(&s->pause_sem); +- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, +- new_state); +- *current_active_state = new_state; +- qemu_mutex_lock_iothread(); ++ /* ++ * If the migration is cancelled when it is in the completion phase, ++ * the migration state is set to MIGRATION_STATUS_CANCELLING. ++ * So we don't need to wait a semaphore, otherwise we would always ++ * wait for the 'pause_sem' semaphore. ++ */ ++ if (s->state != MIGRATION_STATUS_CANCELLING) { ++ qemu_mutex_unlock_iothread(); ++ migrate_set_state(&s->state, *current_active_state, ++ MIGRATION_STATUS_PRE_SWITCHOVER); ++ qemu_sem_wait(&s->pause_sem); ++ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, ++ new_state); ++ *current_active_state = new_state; ++ qemu_mutex_lock_iothread(); ++ } + + return s->state == new_state ? 0 : -EINVAL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch b/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch new file mode 100644 index 0000000..2d3d519 --- /dev/null +++ b/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch @@ -0,0 +1,172 @@ +From 8e8f421cce99543081f225acf46541312cfbc371 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 17 Mar 2020 17:05:18 +0000 +Subject: [PATCH 1/2] migration: Rate limit inside host pages + +RH-Author: Laurent Vivier +Message-id: <20200317170518.9303-1-lvivier@redhat.com> +Patchwork-id: 94374 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages +Bugzilla: 1814336 +RH-Acked-by: Peter Xu +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +From: "Dr. David Alan Gilbert" + +When using hugepages, rate limiting is necessary within each huge +page, since a 1G huge page can take a significant time to send, so +you end up with bursty behaviour. + +Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") +Reported-by: Lin Ma +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) +Signed-off-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 +BRANCH: rhel-av-8.2.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 +TESTED: Tested that the migration abort doesn't trigger an error message in + the kernel logs on P9 + +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 57 ++++++++++++++++++++++++++++---------------------- + migration/migration.h | 1 + + migration/ram.c | 2 ++ + migration/trace-events | 4 ++-- + 4 files changed, 37 insertions(+), 27 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index ed18c59..e31d0f5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3253,6 +3253,37 @@ void migration_consume_urgent_request(void) + qemu_sem_wait(&migrate_get_current()->rate_limit_sem); + } + ++/* Returns true if the rate limiting was broken by an urgent request */ ++bool migration_rate_limit(void) ++{ ++ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ MigrationState *s = migrate_get_current(); ++ ++ bool urgent = false; ++ migration_update_counters(s, now); ++ if (qemu_file_rate_limit(s->to_dst_file)) { ++ /* ++ * Wait for a delay to do rate limiting OR ++ * something urgent to post the semaphore. ++ */ ++ int ms = s->iteration_start_time + BUFFER_DELAY - now; ++ trace_migration_rate_limit_pre(ms); ++ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { ++ /* ++ * We were woken by one or more urgent things but ++ * the timedwait will have consumed one of them. ++ * The service routine for the urgent wake will dec ++ * the semaphore itself for each item it consumes, ++ * so add this one we just eat back. ++ */ ++ qemu_sem_post(&s->rate_limit_sem); ++ urgent = true; ++ } ++ trace_migration_rate_limit_post(urgent); ++ } ++ return urgent; ++} ++ + /* + * Master migration thread on the source VM. + * It drives the migration and pumps the data down the outgoing channel. +@@ -3319,8 +3350,6 @@ static void *migration_thread(void *opaque) + trace_migration_thread_setup_complete(); + + while (migration_is_active(s)) { +- int64_t current_time; +- + if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { + MigIterateState iter_state = migration_iteration_run(s); + if (iter_state == MIG_ITERATE_SKIP) { +@@ -3347,29 +3376,7 @@ static void *migration_thread(void *opaque) + update_iteration_initial_status(s); + } + +- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +- +- migration_update_counters(s, current_time); +- +- urgent = false; +- if (qemu_file_rate_limit(s->to_dst_file)) { +- /* Wait for a delay to do rate limiting OR +- * something urgent to post the semaphore. +- */ +- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; +- trace_migration_thread_ratelimit_pre(ms); +- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { +- /* We were worken by one or more urgent things but +- * the timedwait will have consumed one of them. +- * The service routine for the urgent wake will dec +- * the semaphore itself for each item it consumes, +- * so add this one we just eat back. +- */ +- qemu_sem_post(&s->rate_limit_sem); +- urgent = true; +- } +- trace_migration_thread_ratelimit_post(urgent); +- } ++ urgent = migration_rate_limit(); + } + + trace_migration_thread_after_loop(); +diff --git a/migration/migration.h b/migration/migration.h +index a2b2336..a15e8d8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -347,5 +347,6 @@ extern bool migrate_pre_2_2; + + void migration_make_urgent_request(void); + void migration_consume_urgent_request(void); ++bool migration_rate_limit(void); + + #endif +diff --git a/migration/ram.c b/migration/ram.c +index 3891eff..5344c7d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2661,6 +2661,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + + pages += tmppages; + pss->page++; ++ /* Allow rate limiting to happen in the middle of huge pages */ ++ migration_rate_limit(); + } while ((pss->page & (pagesize_bits - 1)) && + offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + +diff --git a/migration/trace-events b/migration/trace-events +index 6dee7b5..2f9129e 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 + migration_completion_file_err(void) "" + migration_completion_postcopy_end(void) "" + migration_completion_postcopy_end_after_complete(void) "" ++migration_rate_limit_pre(int ms) "%d ms" ++migration_rate_limit_post(int urgent) "urgent: %d" + migration_return_path_end_before(void) "" + migration_return_path_end_after(int rp_error) "%d" + migration_thread_after_loop(void) "" + migration_thread_file_err(void) "" +-migration_thread_ratelimit_pre(int ms) "%d ms" +-migration_thread_ratelimit_post(int urgent) "urgent: %d" + migration_thread_setup_complete(void) "" + open_return_path_on_source(void) "" + open_return_path_on_source_continue(void) "" +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch b/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch new file mode 100644 index 0000000..5fa7fde --- /dev/null +++ b/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch @@ -0,0 +1,65 @@ +From 32ee75b7f4a31d6080e5659e2a0285a046ef1036 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:34 +0000 +Subject: [PATCH 02/18] migration/multifd: clean pages after filling packet + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-2-quintela@redhat.com> +Patchwork-id: 94112 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/10] migration/multifd: clean pages after filling packet +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Wei Yang + +This is a preparation for the next patch: + + not use multifd during postcopy. + +Without enabling postcopy, everything looks good. While after enabling +postcopy, migration may fail even not use multifd during postcopy. The +reason is the pages is not properly cleared and *old* target page will +continue to be transferred. + +After clean pages, migration succeeds. + +Signed-off-by: Wei Yang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit eab54aa78ffd9fb7895b20fc2761ee998479489b) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 5078f94..65580e3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -944,10 +944,10 @@ static int multifd_send_pages(RAMState *rs) + } + qemu_mutex_unlock(&p->mutex); + } +- p->pages->used = 0; ++ assert(!p->pages->used); ++ assert(!p->pages->block); + + p->packet_num = multifd_send_state->packet_num++; +- p->pages->block = NULL; + multifd_send_state->pages = p->pages; + p->pages = pages; + transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; +@@ -1129,6 +1129,8 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->num_pages += used; ++ p->pages->used = 0; ++ p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + + trace_multifd_send(p->id, packet_num, used, flags, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch b/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch new file mode 100644 index 0000000..0c5fe80 --- /dev/null +++ b/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch @@ -0,0 +1,77 @@ +From 2c14a6831954a59256cc8d1980da0ad705a3a3fa Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:37 +0000 +Subject: [PATCH 05/18] migration/multifd: fix destroyed mutex access in + terminating multifd threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-5-quintela@redhat.com> +Patchwork-id: 94119 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/10] migration/multifd: fix destroyed mutex access in terminating multifd threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Jiahui Cen + +One multifd will lock all the other multifds' IOChannel mutex to inform them +to quit by setting p->quit or shutting down p->c. In this senario, if some +multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup +had destroyed their mutex, it could cause destroyed mutex access when trying +lock their mutex. + +Here is the coredump stack: + #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 + #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 + #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 + #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 + #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 + #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 + #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 + #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 + #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 + #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 + +To fix it up, let's destroy the mutex after all the other multifd threads had +been terminated. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 9560a48ecc0c20d87bc458a6db77fba651605819) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 860f781..6c55c5d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1052,6 +1052,10 @@ void multifd_save_cleanup(void) + if (p->running) { + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDSendParams *p = &multifd_send_state->params[i]; ++ + socket_send_channel_destroy(p->c); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +@@ -1335,6 +1339,10 @@ int multifd_load_cleanup(Error **errp) + qemu_sem_post(&p->sem_sync); + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDRecvParams *p = &multifd_recv_state->params[i]; ++ + object_unref(OBJECT(p->c)); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch new file mode 100644 index 0000000..9e9683c --- /dev/null +++ b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch @@ -0,0 +1,75 @@ +From 517a99c5fba163bf684978fe3d9476b619481391 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:42 +0000 +Subject: [PATCH 10/18] migration/multifd: fix nullptr access in + multifd_send_terminate_threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-10-quintela@redhat.com> +Patchwork-id: 94117 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/10] migration/multifd: fix nullptr access in multifd_send_terminate_threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Zhimin Feng + +If the multifd_send_threads is not created when migration is failed, +multifd_save_cleanup would be called twice. In this senario, the +multifd_send_state is accessed after it has been released, the result +is that the source VM is crashing down. + +Here is the coredump stack: + Program received signal SIGSEGV, Segmentation fault. + 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; + #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 + #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 + #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 + #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 + #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 + #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 + #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 + #10 0x00005629334c5acf in main_loop () at vl.c:1810 + #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 + +If the multifd_send_threads is not created when migration is failed. +In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. + +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 9c4d333c092e9c26d38f740ff3616deb42f21681) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 902c56c..3891eff 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1229,7 +1229,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + trace_multifd_new_send_channel_async(p->id); + if (qio_task_propagate_error(task, &local_err)) { + migrate_set_error(migrate_get_current(), local_err); +- multifd_save_cleanup(); ++ /* Error happen, we need to tell who pay attention to me */ ++ qemu_sem_post(&multifd_send_state->channels_ready); ++ qemu_sem_post(&p->sem_sync); ++ /* ++ * Although multifd_send_thread is not created, but main migration ++ * thread neet to judge whether it is running, so we need to mark ++ * its status. ++ */ ++ p->quit = true; + } else { + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch new file mode 100644 index 0000000..e780698 --- /dev/null +++ b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch @@ -0,0 +1,68 @@ +From 7f664fe26ff67f8131faa7a81a388b8a5b51403f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:36 +0000 +Subject: [PATCH 04/18] migration/multifd: fix nullptr access in terminating + multifd threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-4-quintela@redhat.com> +Patchwork-id: 94110 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/10] migration/multifd: fix nullptr access in terminating multifd threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Jiahui Cen + +One multifd channel will shutdown all the other multifd's IOChannel when it +fails to receive an IOChannel. In this senario, if some multifds had not +received its IOChannel yet, it would try to shutdown its IOChannel which could +cause nullptr access at qio_channel_shutdown. + +Here is the coredump stack: + #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 + #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 + #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 + #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 + #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 + #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 + #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket.c:166 + #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 + #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 + #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 + #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 + #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 + #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 + +To fix it up, let's check p->c before calling qio_channel_shutdown. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit f76e32eb05041ab001184ab16afb56524adccd0c) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 8c783b3..860f781 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1307,7 +1307,9 @@ static void multifd_recv_terminate_threads(Error *err) + - normal quit, i.e. everything went fine, just finished + - error quit: We close the channels so the channel threads + finish the qio_channel_read_all_eof() */ +- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ if (p->c) { ++ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ } + qemu_mutex_unlock(&p->mutex); + } + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch b/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch new file mode 100644 index 0000000..c20cb6c --- /dev/null +++ b/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch @@ -0,0 +1,123 @@ +From 261ee33e0e6711fadd3049e4640bb731ee3d44ff Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:57:10 +0000 +Subject: [PATCH 9/9] mirror: Don't let an operation wait for itself + +RH-Author: Kevin Wolf +Message-id: <20200224165710.4830-3-kwolf@redhat.com> +Patchwork-id: 94045 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Don't let an operation wait for itself +Bugzilla: 1794692 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +mirror_wait_for_free_in_flight_slot() just picks a random operation to +wait for. However, when mirror_co_read() waits for free slots, its +MirrorOp is already in s->ops_in_flight, so if not enough slots are +immediately available, an operation can end up waiting for itself to +complete, which results in a hang. + +Fix this by passing the current MirrorOp and skipping this operation +when picking an operation to wait for. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 8959e42..cacbc70 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) ++mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { ++ if (self == op) { ++ continue; ++ } + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op +@@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + } + + static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) + { + /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, false); ++ mirror_wait_for_any_operation(s, self, false); + } + + /* Perform a mirror copy operation. +@@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, op); + } + + /* Now make a QEMUIOVector taking enough granularity-sized chunks +@@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, pseudo_op); + } + + if (s->ret < 0) { +@@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) + static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) + { + while (s->in_flight > 0) { +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + } + } + +@@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + continue; + } + +@@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, true); ++ mirror_wait_for_any_operation(s, NULL, true); + } + + if (s->ret < 0) { +@@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + continue; + } else if (cnt != 0) { + delay_ns = mirror_iteration(s); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Make-sure-that-source-and-target-size-match.patch b/SOURCES/kvm-mirror-Make-sure-that-source-and-target-size-match.patch new file mode 100644 index 0000000..09d1152 --- /dev/null +++ b/SOURCES/kvm-mirror-Make-sure-that-source-and-target-size-match.patch @@ -0,0 +1,89 @@ +From 98bf67db979927a5c7bbdc4a17c35d60b5f38e71 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:24 +0100 +Subject: [PATCH 25/26] mirror: Make sure that source and target size match + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-11-kwolf@redhat.com> +Patchwork-id: 97110 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 10/11] mirror: Make sure that source and target size match +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +If the target is shorter than the source, mirror would copy data until +it reaches the end of the target and then fail with an I/O error when +trying to write past the end. + +If the target is longer than the source, the mirror job would complete +successfully, but the target wouldn't actually be an accurate copy of +the source image (it would contain some additional garbage at the end). + +Fix this by checking that both images have the same size when the job +starts. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Message-Id: <20200511135825.219437-4-kwolf@redhat.com> +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit e83dd6808c6e0975970f37b49b27cc37bb54eea8) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 5e5a521..0d32fca 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -859,6 +859,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + BlockDriverState *target_bs = blk_bs(s->target); + bool need_drain = true; + int64_t length; ++ int64_t target_length; + BlockDriverInfo bdi; + char backing_filename[2]; /* we only need 2 characters because we are only + checking for a NULL string */ +@@ -874,24 +875,26 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + goto immediate_exit; + } + ++ target_length = blk_getlength(s->target); ++ if (target_length < 0) { ++ ret = target_length; ++ goto immediate_exit; ++ } ++ + /* Active commit must resize the base image if its size differs from the + * active layer. */ + if (s->base == blk_bs(s->target)) { +- int64_t base_length; +- +- base_length = blk_getlength(s->target); +- if (base_length < 0) { +- ret = base_length; +- goto immediate_exit; +- } +- +- if (s->bdev_length > base_length) { ++ if (s->bdev_length > target_length) { + ret = blk_truncate(s->target, s->bdev_length, false, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + goto immediate_exit; + } + } ++ } else if (s->bdev_length != target_length) { ++ error_setg(errp, "Source and target image have different sizes"); ++ ret = -EINVAL; ++ goto immediate_exit; + } + + if (s->bdev_length == 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch b/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch new file mode 100644 index 0000000..67f3e54 --- /dev/null +++ b/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch @@ -0,0 +1,51 @@ +From 27fe3b8d42a2c99de01ce20e4b0727079c12da65 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:57:09 +0000 +Subject: [PATCH 8/9] mirror: Store MirrorOp.co for debuggability + +RH-Author: Kevin Wolf +Message-id: <20200224165710.4830-2-kwolf@redhat.com> +Patchwork-id: 94044 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] mirror: Store MirrorOp.co for debuggability +Bugzilla: 1794692 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +If a coroutine is launched, but the coroutine pointer isn't stored +anywhere, debugging any problems inside the coroutine is quite hard. +Let's store the coroutine pointer of a mirror operation in MirrorOp to +have it available in the debugger. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit eed325b92c3e68417121ea23f96e33af6a4654ed) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/block/mirror.c b/block/mirror.c +index f0f2d9d..8959e42 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -103,6 +103,7 @@ struct MirrorOp { + bool is_pseudo_op; + bool is_active_write; + CoQueue waiting_requests; ++ Coroutine *co; + + QTAILQ_ENTRY(MirrorOp) next; + }; +@@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + default: + abort(); + } ++ op->co = co; + + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); + qemu_coroutine_enter(co); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch b/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch new file mode 100644 index 0000000..a06d30e --- /dev/null +++ b/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch @@ -0,0 +1,95 @@ +From bddf389330e11fb0ce17413c1bfa2264a281ded2 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 30 Mar 2020 11:19:24 +0100 +Subject: [PATCH 4/4] mirror: Wait only for in-flight operations + +RH-Author: Kevin Wolf +Message-id: <20200330111924.22938-3-kwolf@redhat.com> +Patchwork-id: 94463 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Wait only for in-flight operations +Bugzilla: 1794692 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +mirror_wait_for_free_in_flight_slot() just picks a random operation to +wait for. However, a MirrorOp is already in s->ops_in_flight when +mirror_co_read() waits for free slots, so if not enough slots are +immediately available, an operation can end up waiting for itself, or +two or more operations can wait for each other to complete, which +results in a hang. + +Fix this by adding a flag to MirrorOp that tells us if the request is +already in flight (and therefore occupies slots that it will later +free), and picking only such operations for waiting. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 +Signed-off-by: Kevin Wolf +Message-Id: <20200326153628.4869-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit ce8cabbd17cf738ddfc68384440c38e5dd2fdf97) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 8959e42..5e5a521 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -102,6 +102,7 @@ struct MirrorOp { + + bool is_pseudo_op; + bool is_active_write; ++ bool is_in_flight; + CoQueue waiting_requests; + Coroutine *co; + +@@ -293,7 +294,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation + * to wait on. */ +- if (!op->is_pseudo_op && op->is_active_write == active) { ++ if (!op->is_pseudo_op && op->is_in_flight && ++ op->is_active_write == active) ++ { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } +@@ -367,6 +370,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + /* Copy the dirty cluster. */ + s->in_flight++; + s->bytes_in_flight += op->bytes; ++ op->is_in_flight = true; + trace_mirror_one_iteration(s, op->offset, op->bytes); + + ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, +@@ -382,6 +386,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); +@@ -396,6 +401,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); + mirror_write_complete(op, ret); +@@ -1306,6 +1312,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + .offset = offset, + .bytes = bytes, + .is_active_write = true, ++ .is_in_flight = true, + }; + qemu_co_queue_init(&op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch new file mode 100644 index 0000000..bca0b4c --- /dev/null +++ b/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch @@ -0,0 +1,74 @@ +From 78c7fb5afcb298631df47f6b71cf764f921c15f4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:38 +0000 +Subject: [PATCH 06/18] multifd: Make sure that we don't do any IO after an + error + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-6-quintela@redhat.com> +Patchwork-id: 94118 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/10] multifd: Make sure that we don't do any IO after an error +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 3d4095b222d97393b1c2c6e514951ec7798f1c43) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 6c55c5d..a0257ee 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3440,7 +3440,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + { + RAMState **temp = opaque; + RAMState *rs = *temp; +- int ret; ++ int ret = 0; + int i; + int64_t t0; + int done = 0; +@@ -3511,12 +3511,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); +- ram_counters.transferred += 8; ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ram_counters.transferred += 8; + +- ret = qemu_file_get_error(f); ++ ret = qemu_file_get_error(f); ++ } + if (ret < 0) { + return ret; + } +@@ -3568,9 +3570,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ } + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch b/SOURCES/kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch new file mode 100644 index 0000000..94d2c98 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch @@ -0,0 +1,161 @@ +From f49ff2ed5675f1d0cddc404842e9d6e4e572d5a7 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 10 Jun 2020 18:32:01 -0400 +Subject: [PATCH 1/2] nbd/server: Avoid long error message assertions + CVE-2020-10761 + +RH-Author: Eric Blake +Message-id: <20200610183202.3780750-2-eblake@redhat.com> +Patchwork-id: 97494 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] nbd/server: Avoid long error message assertions CVE-2020-10761 +Bugzilla: 1845384 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Ever since commit 36683283 (v2.8), the server code asserts that error +strings sent to the client are well-formed per the protocol by not +exceeding the maximum string length of 4096. At the time the server +first started sending error messages, the assertion could not be +triggered, because messages were completely under our control. +However, over the years, we have added latent scenarios where a client +could trigger the server to attempt an error message that would +include the client's information if it passed other checks first: + +- requesting NBD_OPT_INFO/GO on an export name that is not present + (commit 0cfae925 in v2.12 echoes the name) + +- requesting NBD_OPT_LIST/SET_META_CONTEXT on an export name that is + not present (commit e7b1948d in v2.12 echoes the name) + +At the time, those were still safe because we flagged names larger +than 256 bytes with a different message; but that changed in commit +93676c88 (v4.2) when we raised the name limit to 4096 to match the NBD +string limit. (That commit also failed to change the magic number +4096 in nbd_negotiate_send_rep_err to the just-introduced named +constant.) So with that commit, long client names appended to server +text can now trigger the assertion, and thus be used as a denial of +service attack against a server. As a mitigating factor, if the +server requires TLS, the client cannot trigger the problematic paths +unless it first supplies TLS credentials, and such trusted clients are +less likely to try to intentionally crash the server. + +We may later want to further sanitize the user-supplied strings we +place into our error messages, such as scrubbing out control +characters, but that is less important to the CVE fix, so it can be a +later patch to the new nbd_sanitize_name. + +Consideration was given to changing the assertion in +nbd_negotiate_send_rep_verr to instead merely log a server error and +truncate the message, to avoid leaving a latent path that could +trigger a future CVE DoS on any new error message. However, this +merely complicates the code for something that is already (correctly) +flagging coding errors, and now that we are aware of the long message +pitfall, we are less likely to introduce such errors in the future, +which would make such error handling dead code. + +Reported-by: Xueqiang Wei +CC: qemu-stable@nongnu.org +Fixes: https://bugzilla.redhat.com/1843684 CVE-2020-10761 +Fixes: 93676c88d7 +Signed-off-by: Eric Blake +Message-Id: <20200610163741.3745251-2-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5c4fe018c025740fef4a0a4421e8162db0c3eefd) +Signed-off-by: Eric Blake +Signed-off-by: Eduardo Lima (Etrunko) +--- + nbd/server.c | 23 ++++++++++++++++++++--- + tests/qemu-iotests/143 | 4 ++++ + tests/qemu-iotests/143.out | 2 ++ + 3 files changed, 26 insertions(+), 3 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 24ebc1a805..d5b9df092c 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -217,7 +217,7 @@ nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, + + msg = g_strdup_vprintf(fmt, va); + len = strlen(msg); +- assert(len < 4096); ++ assert(len < NBD_MAX_STRING_SIZE); + trace_nbd_negotiate_send_rep_err(msg); + ret = nbd_negotiate_send_rep_len(client, type, len, errp); + if (ret < 0) { +@@ -231,6 +231,19 @@ nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, + return 0; + } + ++/* ++ * Return a malloc'd copy of @name suitable for use in an error reply. ++ */ ++static char * ++nbd_sanitize_name(const char *name) ++{ ++ if (strnlen(name, 80) < 80) { ++ return g_strdup(name); ++ } ++ /* XXX Should we also try to sanitize any control characters? */ ++ return g_strdup_printf("%.80s...", name); ++} ++ + /* Send an error reply. + * Return -errno on error, 0 on success. */ + static int GCC_FMT_ATTR(4, 5) +@@ -595,9 +608,11 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) + + exp = nbd_export_find(name); + if (!exp) { ++ g_autofree char *sane_name = nbd_sanitize_name(name); ++ + return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN, + errp, "export '%s' not present", +- name); ++ sane_name); + } + + /* Don't bother sending NBD_INFO_NAME unless client requested it */ +@@ -995,8 +1010,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client, + + meta->exp = nbd_export_find(export_name); + if (meta->exp == NULL) { ++ g_autofree char *sane_name = nbd_sanitize_name(export_name); ++ + return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, +- "export '%s' not present", export_name); ++ "export '%s' not present", sane_name); + } + + ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp); +diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 +index f649b36195..d2349903b1 100755 +--- a/tests/qemu-iotests/143 ++++ b/tests/qemu-iotests/143 +@@ -58,6 +58,10 @@ _send_qemu_cmd $QEMU_HANDLE \ + $QEMU_IO_PROG -f raw -c quit \ + "nbd+unix:///no_such_export?socket=$SOCK_DIR/nbd" 2>&1 \ + | _filter_qemu_io | _filter_nbd ++# Likewise, with longest possible name permitted in NBD protocol ++$QEMU_IO_PROG -f raw -c quit \ ++ "nbd+unix:///$(printf %4096d 1 | tr ' ' a)?socket=$SOCK_DIR/nbd" 2>&1 \ ++ | _filter_qemu_io | _filter_nbd | sed 's/aaaa*aa/aa--aa/' + + _send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'quit' }" \ +diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out +index 1f4001c601..fc9c0a761f 100644 +--- a/tests/qemu-iotests/143.out ++++ b/tests/qemu-iotests/143.out +@@ -5,6 +5,8 @@ QA output created by 143 + {"return": {}} + qemu-io: can't open device nbd+unix:///no_such_export?socket=SOCK_DIR/nbd: Requested export not available + server reported: export 'no_such_export' not present ++qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available ++server reported: export 'aa--aa...' not present + { 'execute': 'quit' } + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-- +2.27.0 + diff --git a/SOURCES/kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch b/SOURCES/kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch new file mode 100644 index 0000000..6d9382c --- /dev/null +++ b/SOURCES/kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch @@ -0,0 +1,318 @@ +From 70f8bbb27f9f357ea83ff6639fc00aa60fc902b9 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:47 +0100 +Subject: [PATCH 04/12] numa: Extend CLI to provide initiator information for + numa nodes + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-4-plai@redhat.com> +Patchwork-id: 96736 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 03/11] numa: Extend CLI to provide initiator information for numa nodes +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Tao Xu + +In ACPI 6.3 chapter 5.2.27 Heterogeneous Memory Attribute Table (HMAT), +The initiator represents processor which access to memory. And in 5.2.27.3 +Memory Proximity Domain Attributes Structure, the attached initiator is +defined as where the memory controller responsible for a memory proximity +domain. With attached initiator information, the topology of heterogeneous +memory can be described. Add new machine property 'hmat' to enable all +HMAT specific options. + +Extend CLI of "-numa node" option to indicate the initiator numa node-id. +In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report +the platform's HMAT tables. Before using initiator option, enable HMAT with +-machine hmat=on. + +Acked-by: Markus Armbruster +Reviewed-by: Igor Mammedov +Reviewed-by: Jingqi Liu +Suggested-by: Dan Williams +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-2-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 244b3f4485a07c7ce4b7123d6ce9d8c6012756e8) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ + hw/core/numa.c | 23 ++++++++++++++++++ + include/sysemu/numa.h | 5 ++++ + qapi/machine.json | 10 +++++++- + qemu-options.hx | 35 ++++++++++++++++++++++++---- + 5 files changed, 131 insertions(+), 6 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 19c78c6..cb21ae1 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -688,6 +688,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp) + ms->nvdimms_state->is_enabled = value; + } + ++static bool machine_get_hmat(Object *obj, Error **errp) ++{ ++ MachineState *ms = MACHINE(obj); ++ ++ return ms->numa_state->hmat_enabled; ++} ++ ++static void machine_set_hmat(Object *obj, bool value, Error **errp) ++{ ++ MachineState *ms = MACHINE(obj); ++ ++ ms->numa_state->hmat_enabled = value; ++} ++ + static char *machine_get_nvdimm_persistence(Object *obj, Error **errp) + { + MachineState *ms = MACHINE(obj); +@@ -815,6 +829,7 @@ void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, Error **errp) + { + MachineClass *mc = MACHINE_GET_CLASS(machine); ++ NodeInfo *numa_info = machine->numa_state->nodes; + bool match = false; + int i; + +@@ -884,6 +899,17 @@ void machine_set_cpu_numa_node(MachineState *machine, + match = true; + slot->props.node_id = props->node_id; + slot->props.has_node_id = props->has_node_id; ++ ++ if (machine->numa_state->hmat_enabled) { ++ if ((numa_info[props->node_id].initiator < MAX_NODES) && ++ (props->node_id != numa_info[props->node_id].initiator)) { ++ error_setg(errp, "The initiator of CPU NUMA node %" PRId64 ++ " should be itself", props->node_id); ++ return; ++ } ++ numa_info[props->node_id].has_cpu = true; ++ numa_info[props->node_id].initiator = props->node_id; ++ } + } + + if (!match) { +@@ -1130,6 +1156,13 @@ static void machine_initfn(Object *obj) + + if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) { + ms->numa_state = g_new0(NumaState, 1); ++ object_property_add_bool(obj, "hmat", ++ machine_get_hmat, machine_set_hmat, ++ &error_abort); ++ object_property_set_description(obj, "hmat", ++ "Set on/off to enable/disable " ++ "ACPI Heterogeneous Memory Attribute " ++ "Table (HMAT)", NULL); + } + + /* Register notifier when init is done for sysbus sanity checks */ +@@ -1218,6 +1251,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) + return g_string_free(s, false); + } + ++static void numa_validate_initiator(NumaState *numa_state) ++{ ++ int i; ++ NodeInfo *numa_info = numa_state->nodes; ++ ++ for (i = 0; i < numa_state->num_nodes; i++) { ++ if (numa_info[i].initiator == MAX_NODES) { ++ error_report("The initiator of NUMA node %d is missing, use " ++ "'-numa node,initiator' option to declare it", i); ++ exit(1); ++ } ++ ++ if (!numa_info[numa_info[i].initiator].present) { ++ error_report("NUMA node %" PRIu16 " is missing, use " ++ "'-numa node' option to declare it first", ++ numa_info[i].initiator); ++ exit(1); ++ } ++ ++ if (!numa_info[numa_info[i].initiator].has_cpu) { ++ error_report("The initiator of NUMA node %d is invalid", i); ++ exit(1); ++ } ++ } ++} ++ + static void machine_numa_finish_cpu_init(MachineState *machine) + { + int i; +@@ -1258,6 +1317,11 @@ static void machine_numa_finish_cpu_init(MachineState *machine) + machine_set_cpu_numa_node(machine, &props, &error_fatal); + } + } ++ ++ if (machine->numa_state->hmat_enabled) { ++ numa_validate_initiator(machine->numa_state); ++ } ++ + if (s->len && !qtest_enabled()) { + warn_report("CPU(s) not present in any NUMA nodes: %s", + s->str); +diff --git a/hw/core/numa.c b/hw/core/numa.c +index 19f082d..a07eef9 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -129,6 +129,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, + numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL); + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); + } ++ ++ /* ++ * If not set the initiator, set it to MAX_NODES. And if ++ * HMAT is enabled and this node has no cpus, QEMU will raise error. ++ */ ++ numa_info[nodenr].initiator = MAX_NODES; ++ if (node->has_initiator) { ++ if (!ms->numa_state->hmat_enabled) { ++ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " ++ "(HMAT) is disabled, enable it with -machine hmat=on " ++ "before using any of hmat specific options"); ++ return; ++ } ++ ++ if (node->initiator >= MAX_NODES) { ++ error_report("The initiator id %" PRIu16 " expects an integer " ++ "between 0 and %d", node->initiator, ++ MAX_NODES - 1); ++ return; ++ } ++ ++ numa_info[nodenr].initiator = node->initiator; ++ } + numa_info[nodenr].present = true; + max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); + ms->numa_state->num_nodes++; +diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h +index ae9c41d..788cbec 100644 +--- a/include/sysemu/numa.h ++++ b/include/sysemu/numa.h +@@ -18,6 +18,8 @@ struct NodeInfo { + uint64_t node_mem; + struct HostMemoryBackend *node_memdev; + bool present; ++ bool has_cpu; ++ uint16_t initiator; + uint8_t distance[MAX_NODES]; + }; + +@@ -33,6 +35,9 @@ struct NumaState { + /* Allow setting NUMA distance for different NUMA nodes */ + bool have_numa_distance; + ++ /* Detect if HMAT support is enabled. */ ++ bool hmat_enabled; ++ + /* NUMA nodes information */ + NodeInfo nodes[MAX_NODES]; + }; +diff --git a/qapi/machine.json b/qapi/machine.json +index ca26779..27d0e37 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -463,6 +463,13 @@ + # @memdev: memory backend object. If specified for one node, + # it must be specified for all nodes. + # ++# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145, ++# points to the nodeid which has the memory controller ++# responsible for this NUMA node. This field provides ++# additional information as to the initiator node that ++# is closest (as in directly attached) to this node, and ++# therefore has the best performance (since 5.0) ++# + # Since: 2.1 + ## + { 'struct': 'NumaNodeOptions', +@@ -470,7 +477,8 @@ + '*nodeid': 'uint16', + '*cpus': ['uint16'], + '*mem': 'size', +- '*memdev': 'str' }} ++ '*memdev': 'str', ++ '*initiator': 'uint16' }} + + ## + # @NumaDistOptions: +diff --git a/qemu-options.hx b/qemu-options.hx +index df1d27b..e2ce754 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -43,7 +43,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ + " suppress-vmdesc=on|off disables self-describing migration (default=off)\n" + " nvdimm=on|off controls NVDIMM support (default=off)\n" + " enforce-config-section=on|off enforce configuration section migration (default=off)\n" +- " memory-encryption=@var{} memory encryption object to use (default=none)\n", ++ " memory-encryption=@var{} memory encryption object to use (default=none)\n" ++ " hmat=on|off controls ACPI HMAT support (default=off)\n", + QEMU_ARCH_ALL) + STEXI + @item -machine [type=]@var{name}[,prop=@var{value}[,...]] +@@ -103,6 +104,9 @@ NOTE: this parameter is deprecated. Please use @option{-global} + @option{migration.send-configuration}=@var{on|off} instead. + @item memory-encryption=@var{} + Memory encryption object to use. The default is none. ++@item hmat=on|off ++Enables or disables ACPI Heterogeneous Memory Attribute Table (HMAT) support. ++The default is off. + @end table + ETEXI + +@@ -161,14 +165,14 @@ If any on the three values is given, the total number of CPUs @var{n} can be omi + ETEXI + + DEF("numa", HAS_ARG, QEMU_OPTION_numa, +- "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" +- "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" ++ "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" ++ "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" + "-numa dist,src=source,dst=destination,val=distance\n" + "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n", + QEMU_ARCH_ALL) + STEXI +-@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] +-@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] ++@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] ++@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] + @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} + @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] + @findex -numa +@@ -215,6 +219,27 @@ split equally between them. + @samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, + if one node uses @samp{memdev}, all of them have to use it. + ++@samp{initiator} is an additional option that points to an @var{initiator} ++NUMA node that has best performance (the lowest latency or largest bandwidth) ++to this NUMA @var{node}. Note that this option can be set only when ++the machine property 'hmat' is set to 'on'. ++ ++Following example creates a machine with 2 NUMA nodes, node 0 has CPU. ++node 1 has only memory, and its initiator is node 0. Note that because ++node 0 has CPU, by default the initiator of node 0 is itself and must be ++itself. ++@example ++-machine hmat=on \ ++-m 2G,slots=2,maxmem=4G \ ++-object memory-backend-ram,size=1G,id=m0 \ ++-object memory-backend-ram,size=1G,id=m1 \ ++-numa node,nodeid=0,memdev=m0 \ ++-numa node,nodeid=1,memdev=m1,initiator=0 \ ++-smp 2,sockets=2,maxcpus=2 \ ++-numa cpu,node-id=0,socket-id=0 \ ++-numa cpu,node-id=0,socket-id=1 ++@end example ++ + @var{source} and @var{destination} are NUMA node IDs. + @var{distance} is the NUMA distance from @var{source} to @var{destination}. + The distance from a node to itself is always 10. If any pair of nodes is +-- +1.8.3.1 + diff --git a/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch b/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch new file mode 100644 index 0000000..306abeb --- /dev/null +++ b/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch @@ -0,0 +1,545 @@ +From 32341d8cf680625def040b44d70b197f2399bbdb Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:48 +0100 +Subject: [PATCH 05/12] numa: Extend CLI to provide memory latency and + bandwidth information + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-5-plai@redhat.com> +Patchwork-id: 96731 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 04/11] numa: Extend CLI to provide memory latency and bandwidth information +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Liu Jingqi + +Add -numa hmat-lb option to provide System Locality Latency and +Bandwidth Information. These memory attributes help to build +System Locality Latency and Bandwidth Information Structure(s) +in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using +hmat-lb option, enable HMAT with -machine hmat=on. + +Acked-by: Markus Armbruster +Signed-off-by: Liu Jingqi +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-3-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +(cherry picked from commit 9b12dfa03a94d7f7a4b54eb67229a31e58193384) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/numa.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++ + include/sysemu/numa.h | 53 ++++++++++++++ + qapi/machine.json | 93 +++++++++++++++++++++++- + qemu-options.hx | 47 +++++++++++- + 4 files changed, 384 insertions(+), 3 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index a07eef9..58fe713 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/units.h" + #include "sysemu/hostmem.h" + #include "sysemu/numa.h" + #include "sysemu/sysemu.h" +@@ -194,6 +195,186 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp) + ms->numa_state->have_numa_distance = true; + } + ++void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, ++ Error **errp) ++{ ++ int i, first_bit, last_bit; ++ uint64_t max_entry, temp_base, bitmap_copy; ++ NodeInfo *numa_info = numa_state->nodes; ++ HMAT_LB_Info *hmat_lb = ++ numa_state->hmat_lb[node->hierarchy][node->data_type]; ++ HMAT_LB_Data lb_data = {}; ++ HMAT_LB_Data *lb_temp; ++ ++ /* Error checking */ ++ if (node->initiator > numa_state->num_nodes) { ++ error_setg(errp, "Invalid initiator=%d, it should be less than %d", ++ node->initiator, numa_state->num_nodes); ++ return; ++ } ++ if (node->target > numa_state->num_nodes) { ++ error_setg(errp, "Invalid target=%d, it should be less than %d", ++ node->target, numa_state->num_nodes); ++ return; ++ } ++ if (!numa_info[node->initiator].has_cpu) { ++ error_setg(errp, "Invalid initiator=%d, it isn't an " ++ "initiator proximity domain", node->initiator); ++ return; ++ } ++ if (!numa_info[node->target].present) { ++ error_setg(errp, "The target=%d should point to an existing node", ++ node->target); ++ return; ++ } ++ ++ if (!hmat_lb) { ++ hmat_lb = g_malloc0(sizeof(*hmat_lb)); ++ numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb; ++ hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data)); ++ } ++ hmat_lb->hierarchy = node->hierarchy; ++ hmat_lb->data_type = node->data_type; ++ lb_data.initiator = node->initiator; ++ lb_data.target = node->target; ++ ++ if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) { ++ /* Input latency data */ ++ ++ if (!node->has_latency) { ++ error_setg(errp, "Missing 'latency' option"); ++ return; ++ } ++ if (node->has_bandwidth) { ++ error_setg(errp, "Invalid option 'bandwidth' since " ++ "the data type is latency"); ++ return; ++ } ++ ++ /* Detect duplicate configuration */ ++ for (i = 0; i < hmat_lb->list->len; i++) { ++ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); ++ ++ if (node->initiator == lb_temp->initiator && ++ node->target == lb_temp->target) { ++ error_setg(errp, "Duplicate configuration of the latency for " ++ "initiator=%d and target=%d", node->initiator, ++ node->target); ++ return; ++ } ++ } ++ ++ hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX; ++ ++ if (node->latency) { ++ /* Calculate the temporary base and compressed latency */ ++ max_entry = node->latency; ++ temp_base = 1; ++ while (QEMU_IS_ALIGNED(max_entry, 10)) { ++ max_entry /= 10; ++ temp_base *= 10; ++ } ++ ++ /* Calculate the max compressed latency */ ++ temp_base = MIN(hmat_lb->base, temp_base); ++ max_entry = node->latency / hmat_lb->base; ++ max_entry = MAX(hmat_lb->range_bitmap, max_entry); ++ ++ /* ++ * For latency hmat_lb->range_bitmap record the max compressed ++ * latency which should be less than 0xFFFF (UINT16_MAX) ++ */ ++ if (max_entry >= UINT16_MAX) { ++ error_setg(errp, "Latency %" PRIu64 " between initiator=%d and " ++ "target=%d should not differ from previously entered " ++ "min or max values on more than %d", node->latency, ++ node->initiator, node->target, UINT16_MAX - 1); ++ return; ++ } else { ++ hmat_lb->base = temp_base; ++ hmat_lb->range_bitmap = max_entry; ++ } ++ ++ /* ++ * Set lb_info_provided bit 0 as 1, ++ * latency information is provided ++ */ ++ numa_info[node->target].lb_info_provided |= BIT(0); ++ } ++ lb_data.data = node->latency; ++ } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) { ++ /* Input bandwidth data */ ++ if (!node->has_bandwidth) { ++ error_setg(errp, "Missing 'bandwidth' option"); ++ return; ++ } ++ if (node->has_latency) { ++ error_setg(errp, "Invalid option 'latency' since " ++ "the data type is bandwidth"); ++ return; ++ } ++ if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) { ++ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and " ++ "target=%d should be 1MB aligned", node->bandwidth, ++ node->initiator, node->target); ++ return; ++ } ++ ++ /* Detect duplicate configuration */ ++ for (i = 0; i < hmat_lb->list->len; i++) { ++ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); ++ ++ if (node->initiator == lb_temp->initiator && ++ node->target == lb_temp->target) { ++ error_setg(errp, "Duplicate configuration of the bandwidth for " ++ "initiator=%d and target=%d", node->initiator, ++ node->target); ++ return; ++ } ++ } ++ ++ hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1; ++ ++ if (node->bandwidth) { ++ /* Keep bitmap unchanged when bandwidth out of range */ ++ bitmap_copy = hmat_lb->range_bitmap; ++ bitmap_copy |= node->bandwidth; ++ first_bit = ctz64(bitmap_copy); ++ temp_base = UINT64_C(1) << first_bit; ++ max_entry = node->bandwidth / temp_base; ++ last_bit = 64 - clz64(bitmap_copy); ++ ++ /* ++ * For bandwidth, first_bit record the base unit of bandwidth bits, ++ * last_bit record the last bit of the max bandwidth. The max ++ * compressed bandwidth should be less than 0xFFFF (UINT16_MAX) ++ */ ++ if ((last_bit - first_bit) > UINT16_BITS || ++ max_entry >= UINT16_MAX) { ++ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d " ++ "and target=%d should not differ from previously " ++ "entered values on more than %d", node->bandwidth, ++ node->initiator, node->target, UINT16_MAX - 1); ++ return; ++ } else { ++ hmat_lb->base = temp_base; ++ hmat_lb->range_bitmap = bitmap_copy; ++ } ++ ++ /* ++ * Set lb_info_provided bit 1 as 1, ++ * bandwidth information is provided ++ */ ++ numa_info[node->target].lb_info_provided |= BIT(1); ++ } ++ lb_data.data = node->bandwidth; ++ } else { ++ assert(0); ++ } ++ ++ g_array_append_val(hmat_lb->list, lb_data); ++} ++ + void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) + { + Error *err = NULL; +@@ -231,6 +412,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) + machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu), + &err); + break; ++ case NUMA_OPTIONS_TYPE_HMAT_LB: ++ if (!ms->numa_state->hmat_enabled) { ++ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " ++ "(HMAT) is disabled, enable it with -machine hmat=on " ++ "before using any of hmat specific options"); ++ return; ++ } ++ ++ parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err); ++ if (err) { ++ goto end; ++ } ++ break; + default: + abort(); + } +diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h +index 788cbec..70f93c8 100644 +--- a/include/sysemu/numa.h ++++ b/include/sysemu/numa.h +@@ -14,11 +14,34 @@ struct CPUArchId; + #define NUMA_DISTANCE_MAX 254 + #define NUMA_DISTANCE_UNREACHABLE 255 + ++/* the value of AcpiHmatLBInfo flags */ ++enum { ++ HMAT_LB_MEM_MEMORY = 0, ++ HMAT_LB_MEM_CACHE_1ST_LEVEL = 1, ++ HMAT_LB_MEM_CACHE_2ND_LEVEL = 2, ++ HMAT_LB_MEM_CACHE_3RD_LEVEL = 3, ++ HMAT_LB_LEVELS /* must be the last entry */ ++}; ++ ++/* the value of AcpiHmatLBInfo data type */ ++enum { ++ HMAT_LB_DATA_ACCESS_LATENCY = 0, ++ HMAT_LB_DATA_READ_LATENCY = 1, ++ HMAT_LB_DATA_WRITE_LATENCY = 2, ++ HMAT_LB_DATA_ACCESS_BANDWIDTH = 3, ++ HMAT_LB_DATA_READ_BANDWIDTH = 4, ++ HMAT_LB_DATA_WRITE_BANDWIDTH = 5, ++ HMAT_LB_TYPES /* must be the last entry */ ++}; ++ ++#define UINT16_BITS 16 ++ + struct NodeInfo { + uint64_t node_mem; + struct HostMemoryBackend *node_memdev; + bool present; + bool has_cpu; ++ uint8_t lb_info_provided; + uint16_t initiator; + uint8_t distance[MAX_NODES]; + }; +@@ -28,6 +51,31 @@ struct NumaNodeMem { + uint64_t node_plugged_mem; + }; + ++struct HMAT_LB_Data { ++ uint8_t initiator; ++ uint8_t target; ++ uint64_t data; ++}; ++typedef struct HMAT_LB_Data HMAT_LB_Data; ++ ++struct HMAT_LB_Info { ++ /* Indicates it's memory or the specified level memory side cache. */ ++ uint8_t hierarchy; ++ ++ /* Present the type of data, access/read/write latency or bandwidth. */ ++ uint8_t data_type; ++ ++ /* The range bitmap of bandwidth for calculating common base */ ++ uint64_t range_bitmap; ++ ++ /* The common base unit for latencies or bandwidths */ ++ uint64_t base; ++ ++ /* Array to store the latencies or bandwidths */ ++ GArray *list; ++}; ++typedef struct HMAT_LB_Info HMAT_LB_Info; ++ + struct NumaState { + /* Number of NUMA nodes */ + int num_nodes; +@@ -40,11 +88,16 @@ struct NumaState { + + /* NUMA nodes information */ + NodeInfo nodes[MAX_NODES]; ++ ++ /* NUMA nodes HMAT Locality Latency and Bandwidth Information */ ++ HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES]; + }; + typedef struct NumaState NumaState; + + void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp); + void parse_numa_opts(MachineState *ms); ++void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, ++ Error **errp); + void numa_complete_configuration(MachineState *ms); + void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms); + extern QemuOptsList qemu_numa_opts; +diff --git a/qapi/machine.json b/qapi/machine.json +index 27d0e37..cf8faf5 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -426,10 +426,12 @@ + # + # @cpu: property based CPU(s) to node mapping (Since: 2.10) + # ++# @hmat-lb: memory latency and bandwidth information (Since: 5.0) ++# + # Since: 2.1 + ## + { 'enum': 'NumaOptionsType', +- 'data': [ 'node', 'dist', 'cpu' ] } ++ 'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] } + + ## + # @NumaOptions: +@@ -444,7 +446,8 @@ + 'data': { + 'node': 'NumaNodeOptions', + 'dist': 'NumaDistOptions', +- 'cpu': 'NumaCpuOptions' }} ++ 'cpu': 'NumaCpuOptions', ++ 'hmat-lb': 'NumaHmatLBOptions' }} + + ## + # @NumaNodeOptions: +@@ -558,6 +561,92 @@ + 'data' : {} } + + ## ++# @HmatLBMemoryHierarchy: ++# ++# The memory hierarchy in the System Locality Latency and Bandwidth ++# Information Structure of HMAT (Heterogeneous Memory Attribute Table) ++# ++# For more information about @HmatLBMemoryHierarchy, see chapter ++# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec. ++# ++# @memory: the structure represents the memory performance ++# ++# @first-level: first level of memory side cache ++# ++# @second-level: second level of memory side cache ++# ++# @third-level: third level of memory side cache ++# ++# Since: 5.0 ++## ++{ 'enum': 'HmatLBMemoryHierarchy', ++ 'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] } ++ ++## ++# @HmatLBDataType: ++# ++# Data type in the System Locality Latency and Bandwidth ++# Information Structure of HMAT (Heterogeneous Memory Attribute Table) ++# ++# For more information about @HmatLBDataType, see chapter ++# 5.2.27.4: Table 5-146: Field "Data Type" of ACPI 6.3 spec. ++# ++# @access-latency: access latency (nanoseconds) ++# ++# @read-latency: read latency (nanoseconds) ++# ++# @write-latency: write latency (nanoseconds) ++# ++# @access-bandwidth: access bandwidth (Bytes per second) ++# ++# @read-bandwidth: read bandwidth (Bytes per second) ++# ++# @write-bandwidth: write bandwidth (Bytes per second) ++# ++# Since: 5.0 ++## ++{ 'enum': 'HmatLBDataType', ++ 'data': [ 'access-latency', 'read-latency', 'write-latency', ++ 'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] } ++ ++## ++# @NumaHmatLBOptions: ++# ++# Set the system locality latency and bandwidth information ++# between Initiator and Target proximity Domains. ++# ++# For more information about @NumaHmatLBOptions, see chapter ++# 5.2.27.4: Table 5-146 of ACPI 6.3 spec. ++# ++# @initiator: the Initiator Proximity Domain. ++# ++# @target: the Target Proximity Domain. ++# ++# @hierarchy: the Memory Hierarchy. Indicates the performance ++# of memory or side cache. ++# ++# @data-type: presents the type of data, access/read/write ++# latency or hit latency. ++# ++# @latency: the value of latency from @initiator to @target ++# proximity domain, the latency unit is "ns(nanosecond)". ++# ++# @bandwidth: the value of bandwidth between @initiator and @target ++# proximity domain, the bandwidth unit is ++# "Bytes per second". ++# ++# Since: 5.0 ++## ++{ 'struct': 'NumaHmatLBOptions', ++ 'data': { ++ 'initiator': 'uint16', ++ 'target': 'uint16', ++ 'hierarchy': 'HmatLBMemoryHierarchy', ++ 'data-type': 'HmatLBDataType', ++ '*latency': 'uint64', ++ '*bandwidth': 'size' }} ++ ++## + # @HostMemPolicy: + # + # Host memory policy types +diff --git a/qemu-options.hx b/qemu-options.hx +index e2ce754..86d9d8a 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -168,16 +168,19 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa, + "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" + "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" + "-numa dist,src=source,dst=destination,val=distance\n" +- "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n", ++ "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n" ++ "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n", + QEMU_ARCH_ALL) + STEXI + @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] + @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] + @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} + @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] ++@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}] + @findex -numa + Define a NUMA node and assign RAM and VCPUs to it. + Set the NUMA distance from a source node to a destination node. ++Set the ACPI Heterogeneous Memory Attributes for the given nodes. + + Legacy VCPU assignment uses @samp{cpus} option where + @var{firstcpu} and @var{lastcpu} are CPU indexes. Each +@@ -256,6 +259,48 @@ specified resources, it just assigns existing resources to NUMA + nodes. This means that one still has to use the @option{-m}, + @option{-smp} options to allocate RAM and VCPUs respectively. + ++Use @samp{hmat-lb} to set System Locality Latency and Bandwidth Information ++between initiator and target NUMA nodes in ACPI Heterogeneous Attribute Memory Table (HMAT). ++Initiator NUMA node can create memory requests, usually it has one or more processors. ++Target NUMA node contains addressable memory. ++ ++In @samp{hmat-lb} option, @var{node} are NUMA node IDs. @var{hierarchy} is the memory ++hierarchy of the target NUMA node: if @var{hierarchy} is 'memory', the structure ++represents the memory performance; if @var{hierarchy} is 'first-level|second-level|third-level', ++this structure represents aggregated performance of memory side caches for each domain. ++@var{type} of 'data-type' is type of data represented by this structure instance: ++if 'hierarchy' is 'memory', 'data-type' is 'access|read|write' latency or 'access|read|write' ++bandwidth of the target memory; if 'hierarchy' is 'first-level|second-level|third-level', ++'data-type' is 'access|read|write' hit latency or 'access|read|write' hit bandwidth of the ++target memory side cache. ++ ++@var{lat} is latency value in nanoseconds. @var{bw} is bandwidth value, ++the possible value and units are NUM[M|G|T], mean that the bandwidth value are ++NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix). ++Note that if latency or bandwidth value is 0, means the corresponding latency or ++bandwidth information is not provided. ++ ++For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and ++a ram, node 1 has only a ram. The processors in node 0 access memory in node ++0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s; ++The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10 ++nanoseconds, access-bandwidth is 100 MB/s. ++@example ++-machine hmat=on \ ++-m 2G \ ++-object memory-backend-ram,size=1G,id=m0 \ ++-object memory-backend-ram,size=1G,id=m1 \ ++-smp 2 \ ++-numa node,nodeid=0,memdev=m0 \ ++-numa node,nodeid=1,memdev=m1,initiator=0 \ ++-numa cpu,node-id=0,socket-id=0 \ ++-numa cpu,node-id=0,socket-id=1 \ ++-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \ ++-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \ ++-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \ ++-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M ++@end example ++ + ETEXI + + DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch b/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch new file mode 100644 index 0000000..a17db22 --- /dev/null +++ b/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch @@ -0,0 +1,326 @@ +From 8cd3544b1347b248b9d04eb3d6c9b9bde3a13655 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:49 +0100 +Subject: [PATCH 06/12] numa: Extend CLI to provide memory side cache + information + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-6-plai@redhat.com> +Patchwork-id: 96740 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 05/11] numa: Extend CLI to provide memory side cache information +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Liu Jingqi + +Add -numa hmat-cache option to provide Memory Side Cache Information. +These memory attributes help to build Memory Side Cache Information +Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT). +Before using hmat-cache option, enable HMAT with -machine hmat=on. + +Acked-by: Markus Armbruster +Signed-off-by: Liu Jingqi +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-4-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +(cherry picked from commit c412a48d4d91e8f8b89aae02de0f44f1f0b729e5) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/numa.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ + include/sysemu/numa.h | 5 ++++ + qapi/machine.json | 81 +++++++++++++++++++++++++++++++++++++++++++++++++-- + qemu-options.hx | 17 +++++++++-- + 4 files changed, 179 insertions(+), 4 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index 58fe713..0d1b4be 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -375,6 +375,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, + g_array_append_val(hmat_lb->list, lb_data); + } + ++void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node, ++ Error **errp) ++{ ++ int nb_numa_nodes = ms->numa_state->num_nodes; ++ NodeInfo *numa_info = ms->numa_state->nodes; ++ NumaHmatCacheOptions *hmat_cache = NULL; ++ ++ if (node->node_id >= nb_numa_nodes) { ++ error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less " ++ "than %d", node->node_id, nb_numa_nodes); ++ return; ++ } ++ ++ if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) { ++ error_setg(errp, "The latency and bandwidth information of " ++ "node-id=%" PRIu32 " should be provided before memory side " ++ "cache attributes", node->node_id); ++ return; ++ } ++ ++ if (node->level < 1 || node->level >= HMAT_LB_LEVELS) { ++ error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 " ++ "and less than or equal to %d", node->level, ++ HMAT_LB_LEVELS - 1); ++ return; ++ } ++ ++ assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX); ++ assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX); ++ if (ms->numa_state->hmat_cache[node->node_id][node->level]) { ++ error_setg(errp, "Duplicate configuration of the side cache for " ++ "node-id=%" PRIu32 " and level=%" PRIu8, ++ node->node_id, node->level); ++ return; ++ } ++ ++ if ((node->level > 1) && ++ ms->numa_state->hmat_cache[node->node_id][node->level - 1] && ++ (node->size >= ++ ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) { ++ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8 ++ " should be less than the size(%" PRIu64 ") of " ++ "level=%u", node->size, node->level, ++ ms->numa_state->hmat_cache[node->node_id] ++ [node->level - 1]->size, ++ node->level - 1); ++ return; ++ } ++ ++ if ((node->level < HMAT_LB_LEVELS - 1) && ++ ms->numa_state->hmat_cache[node->node_id][node->level + 1] && ++ (node->size <= ++ ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) { ++ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8 ++ " should be larger than the size(%" PRIu64 ") of " ++ "level=%u", node->size, node->level, ++ ms->numa_state->hmat_cache[node->node_id] ++ [node->level + 1]->size, ++ node->level + 1); ++ return; ++ } ++ ++ hmat_cache = g_malloc0(sizeof(*hmat_cache)); ++ memcpy(hmat_cache, node, sizeof(*hmat_cache)); ++ ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache; ++} ++ + void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) + { + Error *err = NULL; +@@ -425,6 +492,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) + goto end; + } + break; ++ case NUMA_OPTIONS_TYPE_HMAT_CACHE: ++ if (!ms->numa_state->hmat_enabled) { ++ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " ++ "(HMAT) is disabled, enable it with -machine hmat=on " ++ "before using any of hmat specific options"); ++ return; ++ } ++ ++ parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err); ++ if (err) { ++ goto end; ++ } ++ break; + default: + abort(); + } +diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h +index 70f93c8..ba693cc 100644 +--- a/include/sysemu/numa.h ++++ b/include/sysemu/numa.h +@@ -91,6 +91,9 @@ struct NumaState { + + /* NUMA nodes HMAT Locality Latency and Bandwidth Information */ + HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES]; ++ ++ /* Memory Side Cache Information Structure */ ++ NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS]; + }; + typedef struct NumaState NumaState; + +@@ -98,6 +101,8 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp); + void parse_numa_opts(MachineState *ms); + void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, + Error **errp); ++void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node, ++ Error **errp); + void numa_complete_configuration(MachineState *ms); + void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms); + extern QemuOptsList qemu_numa_opts; +diff --git a/qapi/machine.json b/qapi/machine.json +index cf8faf5..b3d30bc 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -428,10 +428,12 @@ + # + # @hmat-lb: memory latency and bandwidth information (Since: 5.0) + # ++# @hmat-cache: memory side cache information (Since: 5.0) ++# + # Since: 2.1 + ## + { 'enum': 'NumaOptionsType', +- 'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] } ++ 'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] } + + ## + # @NumaOptions: +@@ -447,7 +449,8 @@ + 'node': 'NumaNodeOptions', + 'dist': 'NumaDistOptions', + 'cpu': 'NumaCpuOptions', +- 'hmat-lb': 'NumaHmatLBOptions' }} ++ 'hmat-lb': 'NumaHmatLBOptions', ++ 'hmat-cache': 'NumaHmatCacheOptions' }} + + ## + # @NumaNodeOptions: +@@ -647,6 +650,80 @@ + '*bandwidth': 'size' }} + + ## ++# @HmatCacheAssociativity: ++# ++# Cache associativity in the Memory Side Cache Information Structure ++# of HMAT ++# ++# For more information of @HmatCacheAssociativity, see chapter ++# 5.2.27.5: Table 5-147 of ACPI 6.3 spec. ++# ++# @none: None (no memory side cache in this proximity domain, ++# or cache associativity unknown) ++# ++# @direct: Direct Mapped ++# ++# @complex: Complex Cache Indexing (implementation specific) ++# ++# Since: 5.0 ++## ++{ 'enum': 'HmatCacheAssociativity', ++ 'data': [ 'none', 'direct', 'complex' ] } ++ ++## ++# @HmatCacheWritePolicy: ++# ++# Cache write policy in the Memory Side Cache Information Structure ++# of HMAT ++# ++# For more information of @HmatCacheWritePolicy, see chapter ++# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec. ++# ++# @none: None (no memory side cache in this proximity domain, ++# or cache write policy unknown) ++# ++# @write-back: Write Back (WB) ++# ++# @write-through: Write Through (WT) ++# ++# Since: 5.0 ++## ++{ 'enum': 'HmatCacheWritePolicy', ++ 'data': [ 'none', 'write-back', 'write-through' ] } ++ ++## ++# @NumaHmatCacheOptions: ++# ++# Set the memory side cache information for a given memory domain. ++# ++# For more information of @NumaHmatCacheOptions, see chapter ++# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec. ++# ++# @node-id: the memory proximity domain to which the memory belongs. ++# ++# @size: the size of memory side cache in bytes. ++# ++# @level: the cache level described in this structure. ++# ++# @associativity: the cache associativity, ++# none/direct-mapped/complex(complex cache indexing). ++# ++# @policy: the write policy, none/write-back/write-through. ++# ++# @line: the cache Line size in bytes. ++# ++# Since: 5.0 ++## ++{ 'struct': 'NumaHmatCacheOptions', ++ 'data': { ++ 'node-id': 'uint32', ++ 'size': 'size', ++ 'level': 'uint8', ++ 'associativity': 'HmatCacheAssociativity', ++ 'policy': 'HmatCacheWritePolicy', ++ 'line': 'uint16' }} ++ ++## + # @HostMemPolicy: + # + # Host memory policy types +diff --git a/qemu-options.hx b/qemu-options.hx +index 86d9d8a..8fe05b6 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -169,7 +169,8 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa, + "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" + "-numa dist,src=source,dst=destination,val=distance\n" + "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n" +- "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n", ++ "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n" ++ "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n", + QEMU_ARCH_ALL) + STEXI + @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] +@@ -177,6 +178,7 @@ STEXI + @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} + @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] + @itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}] ++@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}] + @findex -numa + Define a NUMA node and assign RAM and VCPUs to it. + Set the NUMA distance from a source node to a destination node. +@@ -280,11 +282,20 @@ NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix). + Note that if latency or bandwidth value is 0, means the corresponding latency or + bandwidth information is not provided. + ++In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs. ++@var{size} is the size of memory side cache in bytes. @var{level} is the cache ++level described in this structure, note that the cache level 0 should not be used ++with @samp{hmat-cache} option. @var{associativity} is the cache associativity, ++the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'. ++@var{policy} is the write policy. @var{line} is the cache Line size in bytes. ++ + For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and + a ram, node 1 has only a ram. The processors in node 0 access memory in node + 0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s; + The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10 + nanoseconds, access-bandwidth is 100 MB/s. ++And for memory side cache information, NUMA node 0 and 1 both have 1 level memory ++cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes: + @example + -machine hmat=on \ + -m 2G \ +@@ -298,7 +309,9 @@ nanoseconds, access-bandwidth is 100 MB/s. + -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \ + -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \ + -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \ +--numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M ++-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \ ++-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \ ++-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8 + @end example + + ETEXI +-- +1.8.3.1 + diff --git a/SOURCES/kvm-numa-properly-check-if-numa-is-supported.patch b/SOURCES/kvm-numa-properly-check-if-numa-is-supported.patch new file mode 100644 index 0000000..c602256 --- /dev/null +++ b/SOURCES/kvm-numa-properly-check-if-numa-is-supported.patch @@ -0,0 +1,81 @@ +From e3a1c2ff0d7b930b1782d59d093fd15471d3aee1 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:46 +0100 +Subject: [PATCH 03/12] numa: properly check if numa is supported + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-3-plai@redhat.com> +Patchwork-id: 96732 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 02/11] numa: properly check if numa is supported +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Igor Mammedov + +Commit aa57020774b, by mistake used MachineClass::numa_mem_supported +to check if NUMA is supported by machine and also as unrelated change +set it to true for sbsa-ref board. + +Luckily change didn't break machines that support NUMA, as the field +is set to true for them. + +But the field is not intended for checking if NUMA is supported and +will be flipped to false within this release for new machine types. + +Fix it: + - by using previously used condition + !mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id + the first time and then use MachineState::numa_state down the road + to check if NUMA is supported + - dropping stray sbsa-ref chunk + +Fixes: aa57020774b690a22be72453b8e91c9b5a68c516 +Signed-off-by: Igor Mammedov +Message-Id: <1576154936-178362-3-git-send-email-imammedo@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit fcd3f2cc124600385dba46c69a80626985c15b50) +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/sbsa-ref.c | 1 - + hw/core/machine.c | 4 ++-- + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c +index 27046cc..c6261d4 100644 +--- a/hw/arm/sbsa-ref.c ++++ b/hw/arm/sbsa-ref.c +@@ -791,7 +791,6 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) + mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; + mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; +- mc->numa_mem_supported = true; + } + + static const TypeInfo sbsa_ref_info = { +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5a025d1..19c78c6 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1128,7 +1128,7 @@ static void machine_initfn(Object *obj) + NULL); + } + +- if (mc->numa_mem_supported) { ++ if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) { + ms->numa_state = g_new0(NumaState, 1); + } + +@@ -1272,7 +1272,7 @@ void machine_run_board_init(MachineState *machine) + { + MachineClass *machine_class = MACHINE_GET_CLASS(machine); + +- if (machine_class->numa_mem_supported) { ++ if (machine->numa_state) { + numa_complete_configuration(machine); + if (machine->numa_state->num_nodes) { + machine_numa_finish_cpu_init(machine); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-numa-remove-not-needed-check.patch b/SOURCES/kvm-numa-remove-not-needed-check.patch new file mode 100644 index 0000000..cbe677f --- /dev/null +++ b/SOURCES/kvm-numa-remove-not-needed-check.patch @@ -0,0 +1,59 @@ +From 348115bbd0d60fada6f7d9fa27848044690a4bc3 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:45 +0100 +Subject: [PATCH 02/12] numa: remove not needed check + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-2-plai@redhat.com> +Patchwork-id: 96738 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 01/11] numa: remove not needed check +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Igor Mammedov + +Currently parse_numa_node() is always called from already numa +enabled context. +Drop unnecessary check if numa is supported. + +Signed-off-by: Igor Mammedov +Message-Id: <1576154936-178362-2-git-send-email-imammedo@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 5275db59aa7ff8a26bd6aa5d07cb4d53de5cfab5) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/numa.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index e3332a9..19f082d 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -83,10 +83,6 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, + return; + } + +- if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) { +- error_setg(errp, "NUMA is not supported by this machine-type"); +- return; +- } + for (cpus = node->cpus; cpus; cpus = cpus->next) { + CpuInstanceProperties props; + if (cpus->value >= max_cpus) { +@@ -178,9 +174,8 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp) + void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) + { + Error *err = NULL; +- MachineClass *mc = MACHINE_GET_CLASS(ms); + +- if (!mc->numa_mem_supported) { ++ if (!ms->numa_state) { + error_setg(errp, "NUMA is not supported by this machine-type"); + goto end; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-pc-bios-s390x-Fix-reset-psw-mask.patch b/SOURCES/kvm-pc-bios-s390x-Fix-reset-psw-mask.patch new file mode 100644 index 0000000..9c45e92 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390x-Fix-reset-psw-mask.patch @@ -0,0 +1,75 @@ +From 38ba55dd27a3b8308f0ce2e82a4c3eba3f197d20 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:53 -0400 +Subject: [PATCH 11/42] pc-bios/s390x: Fix reset psw mask + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-12-thuth@redhat.com> +Patchwork-id: 97034 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 11/38] pc-bios/s390x: Fix reset psw mask +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +We need to set the short psw indication bit in the reset psw, as it is +a short psw. + +Exposed by "s390x: Properly fetch and test the short psw on diag308 +subc 0/1". + +Fixes: 962982329029 ("pc-bios/s390-ccw: do a subsystem reset before running the guest") +Signed-off-by: Janosch Frank +Message-Id: <20191203132813.2734-5-frankja@linux.ibm.com> +Acked-by: Christian Borntraeger +Signed-off-by: Cornelia Huck +(cherry picked from commit 5c6f0d5f46a77d77460dfb518cf1e1e4145c276e) +Signed-off-by: Danilo C. L. de Paula +--- + pc-bios/s390-ccw/jump2ipl.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c +index 266f1502b9..da13c43cc0 100644 +--- a/pc-bios/s390-ccw/jump2ipl.c ++++ b/pc-bios/s390-ccw/jump2ipl.c +@@ -12,11 +12,11 @@ + #define KERN_IMAGE_START 0x010000UL + #define PSW_MASK_64 0x0000000100000000ULL + #define PSW_MASK_32 0x0000000080000000ULL +-#define IPL_PSW_MASK (PSW_MASK_32 | PSW_MASK_64) ++#define PSW_MASK_SHORTPSW 0x0008000000000000ULL ++#define RESET_PSW_MASK (PSW_MASK_SHORTPSW | PSW_MASK_32 | PSW_MASK_64) + + typedef struct ResetInfo { +- uint32_t ipl_mask; +- uint32_t ipl_addr; ++ uint64_t ipl_psw; + uint32_t ipl_continue; + } ResetInfo; + +@@ -50,7 +50,9 @@ void jump_to_IPL_code(uint64_t address) + ResetInfo *current = 0; + + save = *current; +- current->ipl_addr = (uint32_t) (uint64_t) &jump_to_IPL_2; ++ ++ current->ipl_psw = (uint64_t) &jump_to_IPL_2; ++ current->ipl_psw |= RESET_PSW_MASK; + current->ipl_continue = address & 0x7fffffff; + + debug_print_int("set IPL addr to", current->ipl_continue); +@@ -82,7 +84,7 @@ void jump_to_low_kernel(void) + } + + /* Trying to get PSW at zero address */ +- if (*((uint64_t *)0) & IPL_PSW_MASK) { ++ if (*((uint64_t *)0) & RESET_PSW_MASK) { + jump_to_IPL_code((*((uint64_t *)0)) & 0x7fffffff); + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch b/SOURCES/kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch new file mode 100644 index 0000000..2db2f93 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch @@ -0,0 +1,145 @@ +From 8350ad9c0f54519a06ec396c2997330615f4b470 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:58 -0400 +Subject: [PATCH 16/42] pc-bios: s390x: Save iplb location in lowcore + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-17-thuth@redhat.com> +Patchwork-id: 97027 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 16/38] pc-bios: s390x: Save iplb location in lowcore +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +The POP states that for a list directed IPL the IPLB is stored into +memory by the machine loader and its address is stored at offset 0x14 +of the lowcore. + +ZIPL currently uses the address in offset 0x14 to access the IPLB and +acquire flags about secure boot. If the IPLB address points into +memory which has an unsupported mix of flags set, ZIPL will panic +instead of booting the OS. + +As the lowcore can have quite a high entropy for a guest that did drop +out of protected mode (i.e. rebooted) we encountered the ZIPL panic +quite often. + +Signed-off-by: Janosch Frank +Tested-by: Marc Hartmayer +Message-Id: <20200304114231.23493-19-frankja@linux.ibm.com> +Reviewed-by: Christian Borntraeger +Reviewed-by: David Hildenbrand +Signed-off-by: Christian Borntraeger +(cherry picked from commit 9bfc04f9ef6802fff0fc77130ff345a541783363) +Signed-off-by: Danilo C. L. de Paula +--- + pc-bios/s390-ccw/jump2ipl.c | 1 + + pc-bios/s390-ccw/main.c | 8 +++++++- + pc-bios/s390-ccw/netmain.c | 1 + + pc-bios/s390-ccw/s390-arch.h | 10 ++++++++-- + pc-bios/s390-ccw/s390-ccw.h | 1 + + 5 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c +index da13c43cc0..4eba2510b0 100644 +--- a/pc-bios/s390-ccw/jump2ipl.c ++++ b/pc-bios/s390-ccw/jump2ipl.c +@@ -35,6 +35,7 @@ void jump_to_IPL_code(uint64_t address) + { + /* store the subsystem information _after_ the bootmap was loaded */ + write_subsystem_identification(); ++ write_iplb_location(); + + /* prevent unknown IPL types in the guest */ + if (iplb.pbt == S390_IPL_TYPE_QEMU_SCSI) { +diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c +index a21b386280..4e65b411e1 100644 +--- a/pc-bios/s390-ccw/main.c ++++ b/pc-bios/s390-ccw/main.c +@@ -9,6 +9,7 @@ + */ + + #include "libc.h" ++#include "helper.h" + #include "s390-arch.h" + #include "s390-ccw.h" + #include "cio.h" +@@ -22,7 +23,7 @@ QemuIplParameters qipl; + IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE))); + static bool have_iplb; + static uint16_t cutype; +-LowCore const *lowcore; /* Yes, this *is* a pointer to address 0 */ ++LowCore *lowcore; /* Yes, this *is* a pointer to address 0 */ + + #define LOADPARM_PROMPT "PROMPT " + #define LOADPARM_EMPTY " " +@@ -42,6 +43,11 @@ void write_subsystem_identification(void) + *zeroes = 0; + } + ++void write_iplb_location(void) ++{ ++ lowcore->ptr_iplb = ptr2u32(&iplb); ++} ++ + void panic(const char *string) + { + sclp_print(string); +diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c +index f2dcc01e27..309ffa30d9 100644 +--- a/pc-bios/s390-ccw/netmain.c ++++ b/pc-bios/s390-ccw/netmain.c +@@ -40,6 +40,7 @@ + #define DEFAULT_TFTP_RETRIES 20 + + extern char _start[]; ++void write_iplb_location(void) {} + + #define KERNEL_ADDR ((void *)0L) + #define KERNEL_MAX_SIZE ((long)_start) +diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h +index 504fc7c2f0..5f36361c02 100644 +--- a/pc-bios/s390-ccw/s390-arch.h ++++ b/pc-bios/s390-ccw/s390-arch.h +@@ -36,7 +36,13 @@ typedef struct LowCore { + /* prefix area: defined by architecture */ + PSWLegacy ipl_psw; /* 0x000 */ + uint32_t ccw1[2]; /* 0x008 */ +- uint32_t ccw2[2]; /* 0x010 */ ++ union { ++ uint32_t ccw2[2]; /* 0x010 */ ++ struct { ++ uint32_t reserved10; ++ uint32_t ptr_iplb; ++ }; ++ }; + uint8_t pad1[0x80 - 0x18]; /* 0x018 */ + uint32_t ext_params; /* 0x080 */ + uint16_t cpu_addr; /* 0x084 */ +@@ -85,7 +91,7 @@ typedef struct LowCore { + PSW io_new_psw; /* 0x1f0 */ + } __attribute__((packed, aligned(8192))) LowCore; + +-extern LowCore const *lowcore; ++extern LowCore *lowcore; + + static inline void set_prefix(uint32_t address) + { +diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h +index 11bce7d73c..21f27e7990 100644 +--- a/pc-bios/s390-ccw/s390-ccw.h ++++ b/pc-bios/s390-ccw/s390-ccw.h +@@ -57,6 +57,7 @@ void consume_io_int(void); + /* main.c */ + void panic(const char *string); + void write_subsystem_identification(void); ++void write_iplb_location(void); + extern char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); + unsigned int get_loadparm_index(void); + +-- +2.27.0 + diff --git a/SOURCES/kvm-pcie_root_port-Add-hotplug-disabling-option.patch b/SOURCES/kvm-pcie_root_port-Add-hotplug-disabling-option.patch new file mode 100644 index 0000000..57f3c3b --- /dev/null +++ b/SOURCES/kvm-pcie_root_port-Add-hotplug-disabling-option.patch @@ -0,0 +1,153 @@ +From 8587278a20283851081d4d282d11ef6bafd17dc2 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Tue, 17 Mar 2020 13:56:39 -0400 +Subject: [PATCH 1/2] pcie_root_port: Add hotplug disabling option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Julia Suvorova +Message-id: <20200317135639.65085-1-jusual@redhat.com> +Patchwork-id: 94367 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] pcie_root_port: Add hotplug disabling option +Bugzilla: 1790899 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Peter Xu + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1790899 +BRANCH: rhel-av-8.2.1 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27302449 + +Make hot-plug/hot-unplug on PCIe Root Ports optional to allow libvirt +manage it and restrict unplug for the whole machine. This is going to +prevent user-initiated unplug in guests (Windows mostly). +Hotplug is enabled by default. +Usage: + -device pcie-root-port,hotplug=off,... + +If you want to disable hot-unplug on some downstream ports of one +switch, disable hot-unplug on PCIe Root Port connected to the upstream +port as well as on the selected downstream ports. + +Discussion related: + https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg00530.html + +Signed-off-by: Julia Suvorova +Message-Id: <20200226174607.205941-1-jusual@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Ján Tomko +(cherry picked from commit 530a0963184e57e71a5b538e9161f115df533e96) +Signed-off-by: Jon Maloy +--- + hw/pci-bridge/pcie_root_port.c | 2 +- + hw/pci-bridge/xio3130_downstream.c | 2 +- + hw/pci/pcie.c | 11 +++++++---- + hw/pci/pcie_port.c | 1 + + include/hw/pci/pcie.h | 2 +- + include/hw/pci/pcie_port.h | 3 +++ + 6 files changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c +index 012c2cb12c..db80e2ec23 100644 +--- a/hw/pci-bridge/pcie_root_port.c ++++ b/hw/pci-bridge/pcie_root_port.c +@@ -94,7 +94,7 @@ static void rp_realize(PCIDevice *d, Error **errp) + + pcie_cap_arifwd_init(d); + pcie_cap_deverr_init(d); +- pcie_cap_slot_init(d, s->slot); ++ pcie_cap_slot_init(d, s); + pcie_cap_root_init(d); + + pcie_chassis_create(s->chassis); +diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c +index a9f084b863..4489ce4a40 100644 +--- a/hw/pci-bridge/xio3130_downstream.c ++++ b/hw/pci-bridge/xio3130_downstream.c +@@ -94,7 +94,7 @@ static void xio3130_downstream_realize(PCIDevice *d, Error **errp) + } + pcie_cap_flr_init(d); + pcie_cap_deverr_init(d); +- pcie_cap_slot_init(d, s->slot); ++ pcie_cap_slot_init(d, s); + pcie_cap_arifwd_init(d); + + pcie_chassis_create(s->chassis); +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index 08718188bb..0eb3a2a5d2 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -495,7 +495,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + + /* pci express slot for pci express root/downstream port + PCI express capability slot registers */ +-void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) ++void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) + { + uint32_t pos = dev->exp.exp_cap; + +@@ -505,13 +505,16 @@ void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) + pci_long_test_and_clear_mask(dev->config + pos + PCI_EXP_SLTCAP, + ~PCI_EXP_SLTCAP_PSN); + pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, +- (slot << PCI_EXP_SLTCAP_PSN_SHIFT) | ++ (s->slot << PCI_EXP_SLTCAP_PSN_SHIFT) | + PCI_EXP_SLTCAP_EIP | +- PCI_EXP_SLTCAP_HPS | +- PCI_EXP_SLTCAP_HPC | + PCI_EXP_SLTCAP_PIP | + PCI_EXP_SLTCAP_AIP | + PCI_EXP_SLTCAP_ABP); ++ if (s->hotplug) { ++ pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, ++ PCI_EXP_SLTCAP_HPS | ++ PCI_EXP_SLTCAP_HPC); ++ } + + if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { + pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, +diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c +index c19a9be592..36dac33d98 100644 +--- a/hw/pci/pcie_port.c ++++ b/hw/pci/pcie_port.c +@@ -147,6 +147,7 @@ static const TypeInfo pcie_port_type_info = { + static Property pcie_slot_props[] = { + DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), + DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), ++ DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), + DEFINE_PROP_END_OF_LIST() + }; + +diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h +index 7064875835..14c58ebdb6 100644 +--- a/include/hw/pci/pcie.h ++++ b/include/hw/pci/pcie.h +@@ -104,7 +104,7 @@ void pcie_cap_deverr_reset(PCIDevice *dev); + void pcie_cap_lnkctl_init(PCIDevice *dev); + void pcie_cap_lnkctl_reset(PCIDevice *dev); + +-void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot); ++void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s); + void pcie_cap_slot_reset(PCIDevice *dev); + void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta); + void pcie_cap_slot_write_config(PCIDevice *dev, +diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h +index 7515430087..7072cc8731 100644 +--- a/include/hw/pci/pcie_port.h ++++ b/include/hw/pci/pcie_port.h +@@ -55,6 +55,9 @@ struct PCIESlot { + + /* Disable ACS (really for a pcie_root_port) */ + bool disable_acs; ++ ++ /* Indicates whether hot-plug is enabled on the slot */ ++ bool hotplug; + QLIST_ENTRY(PCIESlot) next; + }; + +-- +2.18.2 + diff --git a/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch b/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch new file mode 100644 index 0000000..2dbdb16 --- /dev/null +++ b/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch @@ -0,0 +1,107 @@ +From 22fc9bd7e7ae0b72c6f6e483eb66cf996f519766 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:11 +0000 +Subject: [PATCH 01/15] ppc: Deassert the external interrupt pin in KVM on + reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-2-dgibson@redhat.com> +Patchwork-id: 93429 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] ppc: Deassert the external interrupt pin in KVM on reset +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +When a CPU is reset, QEMU makes sure no interrupt is pending by clearing +CPUPPCstate::pending_interrupts in ppc_cpu_reset(). In the case of a +complete machine emulation, eg. a sPAPR machine, an external interrupt +request could still be pending in KVM though, eg. an IPI. It will be +eventually presented to the guest, which is supposed to acknowledge it at +the interrupt controller. If the interrupt controller is emulated in QEMU, +either XICS or XIVE, ppc_set_irq() won't deassert the external interrupt +pin in KVM since it isn't pending anymore for QEMU. When the vCPU re-enters +the guest, the interrupt request is still pending and the vCPU will try +again to acknowledge it. This causes an infinite loop and eventually hangs +the guest. + +The code has been broken since the beginning. The issue wasn't hit before +because accel=kvm,kernel-irqchip=off is an awkward setup that never got +used until recently with the LC92x IBM systems (aka, Boston). + +Add a ppc_irq_reset() function to do the necessary cleanup, ie. deassert +the IRQ pins of the CPU in QEMU and most importantly the external interrupt +pin for this vCPU in KVM. + +Reported-by: Satheesh Rajendran +Signed-off-by: Greg Kurz +Message-Id: <157548861740.3650476.16879693165328764758.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 401774387aeb37f2ada9bb18f7c7e307b21a3e93) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/ppc.c | 8 ++++++++ + include/hw/ppc/ppc.h | 2 ++ + target/ppc/translate_init.inc.c | 1 + + 3 files changed, 11 insertions(+) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index 52a18eb..d554b64 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -1510,3 +1510,11 @@ PowerPCCPU *ppc_get_vcpu_by_pir(int pir) + + return NULL; + } ++ ++void ppc_irq_reset(PowerPCCPU *cpu) ++{ ++ CPUPPCState *env = &cpu->env; ++ ++ env->irq_input_state = 0; ++ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); ++} +diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h +index 4bdcb8b..5dd7531 100644 +--- a/include/hw/ppc/ppc.h ++++ b/include/hw/ppc/ppc.h +@@ -76,6 +76,7 @@ static inline void ppc970_irq_init(PowerPCCPU *cpu) {} + static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} + static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} + static inline void ppce500_irq_init(PowerPCCPU *cpu) {} ++static inline void ppc_irq_reset(PowerPCCPU *cpu) {} + #else + void ppc40x_irq_init(PowerPCCPU *cpu); + void ppce500_irq_init(PowerPCCPU *cpu); +@@ -83,6 +84,7 @@ void ppc6xx_irq_init(PowerPCCPU *cpu); + void ppc970_irq_init(PowerPCCPU *cpu); + void ppcPOWER7_irq_init(PowerPCCPU *cpu); + void ppcPOWER9_irq_init(PowerPCCPU *cpu); ++void ppc_irq_reset(PowerPCCPU *cpu); + #endif + + /* PPC machines for OpenBIOS */ +diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c +index ba726de..64a8380 100644 +--- a/target/ppc/translate_init.inc.c ++++ b/target/ppc/translate_init.inc.c +@@ -10461,6 +10461,7 @@ static void ppc_cpu_reset(CPUState *s) + env->pending_interrupts = 0; + s->exception_index = POWERPC_EXCP_NONE; + env->error_code = 0; ++ ppc_irq_reset(cpu); + + /* tininess for underflow is detected before rounding */ + set_float_detect_tininess(float_tininess_before_rounding, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch b/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch new file mode 100644 index 0000000..457d149 --- /dev/null +++ b/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch @@ -0,0 +1,112 @@ +From f2f57c1ed926384e074d2048cdbdc30ee2f426eb Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:13 +0000 +Subject: [PATCH 03/15] ppc: Don't use CPUPPCState::irq_input_state with modern + Book3s CPU models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-4-dgibson@redhat.com> +Patchwork-id: 93431 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] ppc: Don't use CPUPPCState::irq_input_state with modern Book3s CPU models +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +The power7_set_irq() and power9_set_irq() functions set this but it is +never used actually. Modern Book3s compatible CPUs are only supported +by the pnv and spapr machines. They have an interrupt controller, XICS +for POWER7/8 and XIVE for POWER9, whose models don't require to track +IRQ input states at the CPU level. + +Drop these lines to avoid confusion. + +Signed-off-by: Greg Kurz +Message-Id: <157548862861.3650476.16622818876928044450.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit c1ad0b892ce20cf2b5e619c79e8a0c4c66b235dc) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/ppc.c | 16 ++-------------- + target/ppc/cpu.h | 4 +++- + 2 files changed, 5 insertions(+), 15 deletions(-) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index d554b64..730a41f 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -275,10 +275,9 @@ void ppc970_irq_init(PowerPCCPU *cpu) + static void power7_set_irq(void *opaque, int pin, int level) + { + PowerPCCPU *cpu = opaque; +- CPUPPCState *env = &cpu->env; + + LOG_IRQ("%s: env %p pin %d level %d\n", __func__, +- env, pin, level); ++ &cpu->env, pin, level); + + switch (pin) { + case POWER7_INPUT_INT: +@@ -292,11 +291,6 @@ static void power7_set_irq(void *opaque, int pin, int level) + LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + return; + } +- if (level) { +- env->irq_input_state |= 1 << pin; +- } else { +- env->irq_input_state &= ~(1 << pin); +- } + } + + void ppcPOWER7_irq_init(PowerPCCPU *cpu) +@@ -311,10 +305,9 @@ void ppcPOWER7_irq_init(PowerPCCPU *cpu) + static void power9_set_irq(void *opaque, int pin, int level) + { + PowerPCCPU *cpu = opaque; +- CPUPPCState *env = &cpu->env; + + LOG_IRQ("%s: env %p pin %d level %d\n", __func__, +- env, pin, level); ++ &cpu->env, pin, level); + + switch (pin) { + case POWER9_INPUT_INT: +@@ -334,11 +327,6 @@ static void power9_set_irq(void *opaque, int pin, int level) + LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + return; + } +- if (level) { +- env->irq_input_state |= 1 << pin; +- } else { +- env->irq_input_state &= ~(1 << pin); +- } + } + + void ppcPOWER9_irq_init(PowerPCCPU *cpu) +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 5c53801..8887f76 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1090,7 +1090,9 @@ struct CPUPPCState { + #if !defined(CONFIG_USER_ONLY) + /* + * This is the IRQ controller, which is implementation dependent +- * and only relevant when emulating a complete machine. ++ * and only relevant when emulating a complete machine. Note that ++ * this isn't used by recent Book3s compatible CPUs (POWER7 and ++ * newer). + */ + uint32_t irq_input_state; + void **irq_inputs; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch b/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch new file mode 100644 index 0000000..9c25b76 --- /dev/null +++ b/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch @@ -0,0 +1,64 @@ +From 428eb7260718b69b1f3f421d03bce10b8785fc49 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:39 +0000 +Subject: [PATCH 19/20] qapi: Add '@allow-write-only-overlay' feature for + 'blockdev-snapshot' + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-14-kwolf@redhat.com> +Patchwork-id: 94290 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 13/13] qapi: Add '@allow-write-only-overlay' feature for 'blockdev-snapshot' +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +Anounce that 'blockdev-snapshot' command's permissions allow changing +of the backing file if the 'consistent_read' permission is not required. + +This is useful for libvirt to allow late opening of the backing chain +during a blockdev-mirror. + +Signed-off-by: Peter Krempa +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-8-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c6bdc312f30d5c7326aa2fdca3e0f98c15eb541a) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + qapi/block-core.json | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index a1e85b0..a64ad81 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1541,6 +1541,12 @@ + # + # For the arguments, see the documentation of BlockdevSnapshot. + # ++# Features: ++# @allow-write-only-overlay: If present, the check whether this operation is safe ++# was relaxed so that it can be used to change ++# backing file of a destination of a blockdev-mirror. ++# (since 5.0) ++# + # Since: 2.5 + # + # Example: +@@ -1561,7 +1567,8 @@ + # + ## + { 'command': 'blockdev-snapshot', +- 'data': 'BlockdevSnapshot' } ++ 'data': 'BlockdevSnapshot', ++ 'features': [ 'allow-write-only-overlay' ] } + + ## + # @change-backing-file: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch b/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch new file mode 100644 index 0000000..e925459 --- /dev/null +++ b/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch @@ -0,0 +1,239 @@ +From 08e7c4a3d0e739b8ff0f236d12e51dc394ec5b88 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:52 -0500 +Subject: [PATCH 01/10] qapi: enable use of g_autoptr with QAPI types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-2-marcandre.lureau@redhat.com> +Patchwork-id: 100520 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 01/10] qapi: enable use of g_autoptr with QAPI types +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Daniel P. Berrangé + +Currently QAPI generates a type and function for free'ing it: + + typedef struct QCryptoBlockCreateOptions QCryptoBlockCreateOptions; + void qapi_free_QCryptoBlockCreateOptions(QCryptoBlockCreateOptions *obj); + +This is used in the traditional manner: + + QCryptoBlockCreateOptions *opts = NULL; + + opts = g_new0(QCryptoBlockCreateOptions, 1); + + ....do stuff with opts... + + qapi_free_QCryptoBlockCreateOptions(opts); + +Since bumping the min glib to 2.48, QEMU has incrementally adopted the +use of g_auto/g_autoptr. This allows the compiler to run a function to +free a variable when it goes out of scope, the benefit being the +compiler can guarantee it is freed in all possible code ptahs. + +This benefit is applicable to QAPI types too, and given the seriously +long method names for some qapi_free_XXXX() functions, is much less +typing. This change thus makes the code generator emit: + + G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlockCreateOptions, + qapi_free_QCryptoBlockCreateOptions) + +The above code example now becomes + + g_autoptr(QCryptoBlockCreateOptions) opts = NULL; + + opts = g_new0(QCryptoBlockCreateOptions, 1); + + ....do stuff with opts... + +Note, if the local pointer needs to live beyond the scope holding the +variable, then g_steal_pointer can be used. This is useful to return the +pointer to the caller in the success codepath, while letting it be freed +in all error codepaths. + + return g_steal_pointer(&opts); + +The crypto/block.h header needs updating to avoid symbol clash now that +the g_autoptr support is a standard QAPI feature. + +Signed-off-by: Daniel P. Berrangé +Message-Id: <20200723153845.2934357-1-berrange@redhat.com> +Reviewed-by: Markus Armbruster +Reviewed-by: Eric Blake +Signed-off-by: Markus Armbruster + +(cherry picked from commit 221db5daf6b3666f1c8e4ca06ae45892e99a112f) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + docs/devel/qapi-code-gen.txt | 2 ++ + scripts/qapi/types.py | 1 + + tests/test-qobject-input-visitor.c | 23 +++++++---------------- + 3 files changed, 10 insertions(+), 16 deletions(-) + +diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt +index 45c93a43cc3..ca59c695fac 100644 +--- a/docs/devel/qapi-code-gen.txt ++++ b/docs/devel/qapi-code-gen.txt +@@ -1278,6 +1278,7 @@ Example: + }; + + void qapi_free_UserDefOne(UserDefOne *obj); ++ G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOne, qapi_free_UserDefOne) + + struct UserDefOneList { + UserDefOneList *next; +@@ -1285,6 +1286,7 @@ Example: + }; + + void qapi_free_UserDefOneList(UserDefOneList *obj); ++ G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOneList, qapi_free_UserDefOneList) + + struct q_obj_my_command_arg { + UserDefOneList *arg1; +diff --git a/scripts/qapi/types.py b/scripts/qapi/types.py +index d8751daa049..c3be141dc90 100644 +--- a/scripts/qapi/types.py ++++ b/scripts/qapi/types.py +@@ -213,6 +213,7 @@ def gen_type_cleanup_decl(name): + ret = mcgen(''' + + void qapi_free_%(c_name)s(%(c_name)s *obj); ++G_DEFINE_AUTOPTR_CLEANUP_FUNC(%(c_name)s, qapi_free_%(c_name)s) + ''', + c_name=c_name(name)) + return ret +diff --git a/tests/test-qobject-input-visitor.c b/tests/test-qobject-input-visitor.c +index 6bacabf0632..e41b91a2a6f 100644 +--- a/tests/test-qobject-input-visitor.c ++++ b/tests/test-qobject-input-visitor.c +@@ -417,7 +417,7 @@ static void test_visitor_in_struct(TestInputVisitorData *data, + static void test_visitor_in_struct_nested(TestInputVisitorData *data, + const void *unused) + { +- UserDefTwo *udp = NULL; ++ g_autoptr(UserDefTwo) udp = NULL; + Visitor *v; + + v = visitor_input_test_init(data, "{ 'string0': 'string0', " +@@ -433,8 +433,6 @@ static void test_visitor_in_struct_nested(TestInputVisitorData *data, + g_assert_cmpstr(udp->dict1->dict2->userdef->string, ==, "string"); + g_assert_cmpstr(udp->dict1->dict2->string, ==, "string2"); + g_assert(udp->dict1->has_dict3 == false); +- +- qapi_free_UserDefTwo(udp); + } + + static void test_visitor_in_list(TestInputVisitorData *data, +@@ -546,7 +544,7 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, + const void *unused) + { + Visitor *v; +- UserDefFlatUnion *tmp; ++ g_autoptr(UserDefFlatUnion) tmp = NULL; + UserDefUnionBase *base; + + v = visitor_input_test_init(data, +@@ -563,8 +561,6 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, + + base = qapi_UserDefFlatUnion_base(tmp); + g_assert(&base->enum1 == &tmp->enum1); +- +- qapi_free_UserDefFlatUnion(tmp); + } + + static void test_visitor_in_alternate(TestInputVisitorData *data, +@@ -690,7 +686,7 @@ static void test_list_union_integer_helper(TestInputVisitorData *data, + const void *unused, + UserDefListUnionKind kind) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); + GString *gstr_union = g_string_new(""); +@@ -782,7 +778,6 @@ static void test_list_union_integer_helper(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + static void test_visitor_in_list_union_int(TestInputVisitorData *data, +@@ -851,7 +846,7 @@ static void test_visitor_in_list_union_uint64(TestInputVisitorData *data, + static void test_visitor_in_list_union_bool(TestInputVisitorData *data, + const void *unused) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + boolList *elem = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); +@@ -879,13 +874,12 @@ static void test_visitor_in_list_union_bool(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + static void test_visitor_in_list_union_string(TestInputVisitorData *data, + const void *unused) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + strList *elem = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); +@@ -914,7 +908,6 @@ static void test_visitor_in_list_union_string(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + #define DOUBLE_STR_MAX 16 +@@ -922,7 +915,7 @@ static void test_visitor_in_list_union_string(TestInputVisitorData *data, + static void test_visitor_in_list_union_number(TestInputVisitorData *data, + const void *unused) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + numberList *elem = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); +@@ -957,7 +950,6 @@ static void test_visitor_in_list_union_number(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + static void input_visitor_test_add(const char *testpath, +@@ -1253,7 +1245,7 @@ static void test_visitor_in_fail_alternate(TestInputVisitorData *data, + static void do_test_visitor_in_qmp_introspect(TestInputVisitorData *data, + const QLitObject *qlit) + { +- SchemaInfoList *schema = NULL; ++ g_autoptr(SchemaInfoList) schema = NULL; + QObject *obj = qobject_from_qlit(qlit); + Visitor *v; + +@@ -1262,7 +1254,6 @@ static void do_test_visitor_in_qmp_introspect(TestInputVisitorData *data, + visit_type_SchemaInfoList(v, NULL, &schema, &error_abort); + g_assert(schema); + +- qapi_free_SchemaInfoList(schema); + qobject_unref(obj); + visit_free(v); + } +-- +2.27.0 + diff --git a/SOURCES/kvm-qcow2-Expose-bitmaps-size-during-measure.patch b/SOURCES/kvm-qcow2-Expose-bitmaps-size-during-measure.patch new file mode 100644 index 0000000..48c15c5 --- /dev/null +++ b/SOURCES/kvm-qcow2-Expose-bitmaps-size-during-measure.patch @@ -0,0 +1,495 @@ +From af4d66e07c86d7593f7d18ae4b6a2151123b529b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:17 +0100 +Subject: [PATCH 12/26] qcow2: Expose bitmaps' size during measure + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-10-eblake@redhat.com> +Patchwork-id: 97072 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 09/12] qcow2: Expose bitmaps' size during measure +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +It's useful to know how much space can be occupied by qcow2 persistent +bitmaps, even though such metadata is unrelated to the guest-visible +data. Report this value as an additional QMP field, present when +measuring an existing image and output format that both support +bitmaps. Update iotest 178 and 190 to updated output, as well as new +coverage in 190 demonstrating non-zero values made possible with the +recently-added qemu-img bitmap command (see 3b51ab4b). + +The new 'bitmaps size:' field is displayed automatically as part of +'qemu-img measure' any time it is present in QMP (that is, any time +both the source image being measured and destination format support +bitmaps, even if the measurement is 0 because there are no bitmaps +present). If the field is absent, it means that no bitmaps can be +copied (source, destination, or both lack bitmaps, including when +measuring based on size rather than on a source image). This behavior +is compatible with an upcoming patch adding 'qemu-img convert +--bitmaps': that command will fail in the same situations where this +patch omits the field. + +The addition of a new field demonstrates why we should always +zero-initialize qapi C structs; while the qcow2 driver still fully +populates all fields, the raw and crypto drivers had to be tweaked to +avoid uninitialized data. + +Consideration was also given towards having a 'qemu-img measure +--bitmaps' which errors out when bitmaps are not possible, and +otherwise sums the bitmaps into the existing allocation totals rather +than displaying as a separate field, as a potential convenience +factor. But this was ultimately decided to be more complexity than +necessary when the QMP interface was sufficient enough with bitmaps +remaining a separate field. + +See also: https://bugzilla.redhat.com/1779904 + +Reported-by: Nir Soffer +Signed-off-by: Eric Blake +Message-Id: <20200521192137.1120211-3-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5d72c68b49769c927e90b78af6d90f6a384b26ac) + +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + block/crypto.c - commit a9da6e49 not present (no measure support) + docs/tools/qemu-img.rst - changes in qemu-img.texi instead +Signed-off-by: Eric Blake + +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-bitmap.c | 36 ++++++++++++++++++++++++++++++ + block/qcow2.c | 14 +++++++++--- + block/qcow2.h | 2 ++ + block/raw-format.c | 2 +- + qapi/block-core.json | 16 +++++++++----- + qemu-img.c | 3 +++ + qemu-img.texi | 7 ++++++ + tests/qemu-iotests/178.out.qcow2 | 16 ++++++++++++++ + tests/qemu-iotests/190 | 47 ++++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/190.out | 27 ++++++++++++++++++++++- + 10 files changed, 158 insertions(+), 12 deletions(-) + +diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c +index cbac905..10d1297 100644 +--- a/block/qcow2-bitmap.c ++++ b/block/qcow2-bitmap.c +@@ -1766,3 +1766,39 @@ bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs) + + return s->qcow_version >= 3; + } ++ ++/* ++ * Compute the space required for bitmaps in @bs. ++ * ++ * The computation is based as if copying to a new image with the ++ * given @cluster_size, which may differ from the cluster size in @bs. ++ */ ++uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, ++ uint32_t cluster_size) ++{ ++ uint64_t bitmaps_size = 0; ++ BdrvDirtyBitmap *bm; ++ size_t bitmap_dir_size = 0; ++ ++ FOR_EACH_DIRTY_BITMAP(bs, bm) { ++ if (bdrv_dirty_bitmap_get_persistence(bm)) { ++ const char *name = bdrv_dirty_bitmap_name(bm); ++ uint32_t granularity = bdrv_dirty_bitmap_granularity(bm); ++ uint64_t bmbytes = ++ get_bitmap_bytes_needed(bdrv_dirty_bitmap_size(bm), ++ granularity); ++ uint64_t bmclusters = DIV_ROUND_UP(bmbytes, cluster_size); ++ ++ /* Assume the entire bitmap is allocated */ ++ bitmaps_size += bmclusters * cluster_size; ++ /* Also reserve space for the bitmap table entries */ ++ bitmaps_size += ROUND_UP(bmclusters * sizeof(uint64_t), ++ cluster_size); ++ /* And space for contribution to bitmap directory size */ ++ bitmap_dir_size += calc_dir_entry_size(strlen(name), 0); ++ } ++ } ++ bitmaps_size += ROUND_UP(bitmap_dir_size, cluster_size); ++ ++ return bitmaps_size; ++} +diff --git a/block/qcow2.c b/block/qcow2.c +index 36b0f7d..dbd870a 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -4751,16 +4751,24 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, + required = virtual_size; + } + +- info = g_new(BlockMeasureInfo, 1); ++ info = g_new0(BlockMeasureInfo, 1); + info->fully_allocated = + qcow2_calc_prealloc_size(virtual_size, cluster_size, + ctz32(refcount_bits)) + luks_payload_size; + +- /* Remove data clusters that are not required. This overestimates the ++ /* ++ * Remove data clusters that are not required. This overestimates the + * required size because metadata needed for the fully allocated file is +- * still counted. ++ * still counted. Show bitmaps only if both source and destination ++ * would support them. + */ + info->required = info->fully_allocated - virtual_size + required; ++ info->has_bitmaps = version >= 3 && in_bs && ++ bdrv_supports_persistent_dirty_bitmap(in_bs); ++ if (info->has_bitmaps) { ++ info->bitmaps = qcow2_get_persistent_dirty_bitmap_size(in_bs, ++ cluster_size); ++ } + return info; + + err: +diff --git a/block/qcow2.h b/block/qcow2.h +index ceb1ceb..3297e6b 100644 +--- a/block/qcow2.h ++++ b/block/qcow2.h +@@ -768,6 +768,8 @@ int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); + bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs); ++uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, ++ uint32_t cluster_size); + + ssize_t coroutine_fn + qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, +diff --git a/block/raw-format.c b/block/raw-format.c +index 93b25e1..4bb54f4 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -346,7 +346,7 @@ static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs, + BDRV_SECTOR_SIZE); + } + +- info = g_new(BlockMeasureInfo, 1); ++ info = g_new0(BlockMeasureInfo, 1); + info->required = required; + + /* Unallocated sectors count towards the file size in raw images */ +diff --git a/qapi/block-core.json b/qapi/block-core.json +index a64ad81..2893209 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -689,18 +689,24 @@ + # efficiently so file size may be smaller than virtual disk size. + # + # The values are upper bounds that are guaranteed to fit the new image file. +-# Subsequent modification, such as internal snapshot or bitmap creation, may +-# require additional space and is not covered here. ++# Subsequent modification, such as internal snapshot or further bitmap ++# creation, may require additional space and is not covered here. + # +-# @required: Size required for a new image file, in bytes. ++# @required: Size required for a new image file, in bytes, when copying just ++# allocated guest-visible contents. + # + # @fully-allocated: Image file size, in bytes, once data has been written +-# to all sectors. ++# to all sectors, when copying just guest-visible contents. ++# ++# @bitmaps: Additional size required if all the top-level bitmap metadata ++# in the source image were to be copied to the destination, ++# present only when source and destination both support ++# persistent bitmaps. (since 5.1) + # + # Since: 2.10 + ## + { 'struct': 'BlockMeasureInfo', +- 'data': {'required': 'int', 'fully-allocated': 'int'} } ++ 'data': {'required': 'int', 'fully-allocated': 'int', '*bitmaps': 'int'} } + + ## + # @query-block: +diff --git a/qemu-img.c b/qemu-img.c +index 11a4537..b57856e 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -5212,6 +5212,9 @@ static int img_measure(int argc, char **argv) + if (output_format == OFORMAT_HUMAN) { + printf("required size: %" PRIu64 "\n", info->required); + printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated); ++ if (info->has_bitmaps) { ++ printf("bitmaps size: %" PRIu64 "\n", info->bitmaps); ++ } + } else { + dump_json_block_measure_info(info); + } +diff --git a/qemu-img.texi b/qemu-img.texi +index abf2771..3670b96 100644 +--- a/qemu-img.texi ++++ b/qemu-img.texi +@@ -576,6 +576,7 @@ The following fields are reported: + @example + required size: 524288 + fully allocated size: 1074069504 ++bitmaps size: 0 + @end example + + The @code{required size} is the file size of the new image. It may be smaller +@@ -586,6 +587,12 @@ been written to all sectors. This is the maximum size that the image file can + occupy with the exception of internal snapshots, dirty bitmaps, vmstate data, + and other advanced image format features. + ++The @code{bitmaps size} is the additional size required in order to ++copy bitmaps from a source image in addition to the guest-visible ++data; the line is omitted if either source or destination lacks ++bitmap support, or 0 if bitmaps are supported but there is nothing to ++copy. ++ + @item snapshot [--object @var{objectdef}] [--image-opts] [-U] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename} + + List, apply, create or delete snapshots in image @var{filename}. +diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 +index 345eab3..b9ed41b 100644 +--- a/tests/qemu-iotests/178.out.qcow2 ++++ b/tests/qemu-iotests/178.out.qcow2 +@@ -37,6 +37,7 @@ qemu-img: The image size is too large (try using a larger cluster size) + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 + required size: 196608 + fully allocated size: 196608 ++bitmaps size: 0 + + converted image file size in bytes: 196608 + +@@ -45,6 +46,7 @@ converted image file size in bytes: 196608 + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + required size: 393216 + fully allocated size: 1074135040 ++bitmaps size: 0 + wrote 512/512 bytes at offset 512 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 65536/65536 bytes at offset 65536 +@@ -53,6 +55,7 @@ wrote 64512/64512 bytes at offset 134217728 + 63 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + required size: 589824 + fully allocated size: 1074135040 ++bitmaps size: 0 + + converted image file size in bytes: 524288 + +@@ -60,6 +63,7 @@ converted image file size in bytes: 524288 + + required size: 524288 + fully allocated size: 1074135040 ++bitmaps size: 0 + + converted image file size in bytes: 458752 + +@@ -67,16 +71,19 @@ converted image file size in bytes: 458752 + + required size: 1074135040 + fully allocated size: 1074135040 ++bitmaps size: 0 + + == qcow2 input image and LUKS encryption == + + required size: 2686976 + fully allocated size: 1076232192 ++bitmaps size: 0 + + == qcow2 input image and preallocation (human) == + + required size: 1074135040 + fully allocated size: 1074135040 ++bitmaps size: 0 + + converted image file size in bytes: 1074135040 + +@@ -87,6 +94,7 @@ wrote 8388608/8388608 bytes at offset 0 + 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + required size: 8716288 + fully allocated size: 8716288 ++bitmaps size: 0 + + converted image file size in bytes: 8716288 + +@@ -173,6 +181,7 @@ qemu-img: The image size is too large (try using a larger cluster size) + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 + { ++ "bitmaps": 0, + "required": 196608, + "fully-allocated": 196608 + } +@@ -183,6 +192,7 @@ converted image file size in bytes: 196608 + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + { ++ "bitmaps": 0, + "required": 393216, + "fully-allocated": 1074135040 + } +@@ -193,6 +203,7 @@ wrote 65536/65536 bytes at offset 65536 + wrote 64512/64512 bytes at offset 134217728 + 63 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { ++ "bitmaps": 0, + "required": 589824, + "fully-allocated": 1074135040 + } +@@ -202,6 +213,7 @@ converted image file size in bytes: 524288 + == qcow2 input image with internal snapshot (json) == + + { ++ "bitmaps": 0, + "required": 524288, + "fully-allocated": 1074135040 + } +@@ -211,6 +223,7 @@ converted image file size in bytes: 458752 + == qcow2 input image and a backing file (json) == + + { ++ "bitmaps": 0, + "required": 1074135040, + "fully-allocated": 1074135040 + } +@@ -218,6 +231,7 @@ converted image file size in bytes: 458752 + == qcow2 input image and LUKS encryption == + + { ++ "bitmaps": 0, + "required": 2686976, + "fully-allocated": 1076232192 + } +@@ -225,6 +239,7 @@ converted image file size in bytes: 458752 + == qcow2 input image and preallocation (json) == + + { ++ "bitmaps": 0, + "required": 1074135040, + "fully-allocated": 1074135040 + } +@@ -237,6 +252,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8388608 + wrote 8388608/8388608 bytes at offset 0 + 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { ++ "bitmaps": 0, + "required": 8716288, + "fully-allocated": 8716288 + } +diff --git a/tests/qemu-iotests/190 b/tests/qemu-iotests/190 +index eb766ad..5084ccd 100755 +--- a/tests/qemu-iotests/190 ++++ b/tests/qemu-iotests/190 +@@ -2,7 +2,7 @@ + # + # qemu-img measure sub-command tests on huge qcow2 files + # +-# Copyright (C) 2017 Red Hat, Inc. ++# Copyright (C) 2017-2020 Red Hat, Inc. + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by +@@ -42,7 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 + _supported_fmt qcow2 + _supported_proto file + +-echo "== Huge file ==" ++echo "== Huge file without bitmaps ==" + echo + + IMGOPTS='cluster_size=2M' _make_test_img 2T +@@ -51,6 +51,49 @@ $QEMU_IMG measure -O raw -f qcow2 "$TEST_IMG" + $QEMU_IMG measure -O qcow2 -o cluster_size=64k -f qcow2 "$TEST_IMG" + $QEMU_IMG measure -O qcow2 -o cluster_size=2M -f qcow2 "$TEST_IMG" + ++echo ++echo "== Huge file with bitmaps ==" ++echo ++ ++$QEMU_IMG bitmap --add --granularity 512 -f qcow2 "$TEST_IMG" b1 ++$QEMU_IMG bitmap --add -g 2M -f qcow2 "$TEST_IMG" b2 ++ ++# No bitmap without a source ++$QEMU_IMG measure -O qcow2 --size 10M ++# No bitmap output, since raw does not support it ++$QEMU_IMG measure -O raw -f qcow2 "$TEST_IMG" ++# No bitmap output, since no bitmaps on raw source. Munge required size, as ++# some filesystems store the qcow2 file with less sparseness than others ++$QEMU_IMG measure -O qcow2 -f raw "$TEST_IMG" | ++ sed '/^required size:/ s/[0-9][0-9]*/SIZE/' ++# No bitmap output, since v2 does not support it ++$QEMU_IMG measure -O qcow2 -o compat=0.10 -f qcow2 "$TEST_IMG" ++ ++# Compute expected output: bitmap clusters + bitmap tables + bitmaps directory ++echo ++val2T=$((2*1024*1024*1024*1024)) ++cluster=$((64*1024)) ++b1clusters=$(( (val2T/512/8 + cluster - 1) / cluster )) ++b2clusters=$(( (val2T/2/1024/1024/8 + cluster - 1) / cluster )) ++echo expected bitmap $((b1clusters * cluster + ++ (b1clusters * 8 + cluster - 1) / cluster * cluster + ++ b2clusters * cluster + ++ (b2clusters * 8 + cluster - 1) / cluster * cluster + ++ cluster)) ++$QEMU_IMG measure -O qcow2 -o cluster_size=64k -f qcow2 "$TEST_IMG" ++ ++# Compute expected output: bitmap clusters + bitmap tables + bitmaps directory ++echo ++cluster=$((2*1024*1024)) ++b1clusters=$(( (val2T/512/8 + cluster - 1) / cluster )) ++b2clusters=$(( (val2T/2/1024/1024/8 + cluster - 1) / cluster )) ++echo expected bitmap $((b1clusters * cluster + ++ (b1clusters * 8 + cluster - 1) / cluster * cluster + ++ b2clusters * cluster + ++ (b2clusters * 8 + cluster - 1) / cluster * cluster + ++ cluster)) ++$QEMU_IMG measure --output=json -O qcow2 -o cluster_size=2M -f qcow2 "$TEST_IMG" ++ + # success, all done + echo "*** done" + rm -f $seq.full +diff --git a/tests/qemu-iotests/190.out b/tests/qemu-iotests/190.out +index d001942..ed9d821 100644 +--- a/tests/qemu-iotests/190.out ++++ b/tests/qemu-iotests/190.out +@@ -1,11 +1,36 @@ + QA output created by 190 +-== Huge file == ++== Huge file without bitmaps == + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2199023255552 + required size: 2199023255552 + fully allocated size: 2199023255552 + required size: 335806464 + fully allocated size: 2199359062016 ++bitmaps size: 0 + required size: 18874368 + fully allocated size: 2199042129920 ++bitmaps size: 0 ++ ++== Huge file with bitmaps == ++ ++required size: 327680 ++fully allocated size: 10813440 ++required size: 2199023255552 ++fully allocated size: 2199023255552 ++required size: SIZE ++fully allocated size: 17170432 ++required size: 335806464 ++fully allocated size: 2199359062016 ++ ++expected bitmap 537198592 ++required size: 335806464 ++fully allocated size: 2199359062016 ++bitmaps size: 537198592 ++ ++expected bitmap 545259520 ++{ ++ "bitmaps": 545259520, ++ "required": 18874368, ++ "fully-allocated": 2199042129920 ++} + *** done +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch b/SOURCES/kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch new file mode 100644 index 0000000..43ff282 --- /dev/null +++ b/SOURCES/kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch @@ -0,0 +1,47 @@ +From bd97bbbce54da301407d51cae35e09ba2a12b160 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 13 Jul 2020 14:24:48 -0400 +Subject: [PATCH 1/4] qcow2: Fix alloc_cluster_abort() for pre-existing + clusters + +RH-Author: Max Reitz +Message-id: <20200713142451.289703-2-mreitz@redhat.com> +Patchwork-id: 97954 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/4] qcow2: Fix alloc_cluster_abort() for pre-existing clusters +Bugzilla: 1807057 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +handle_alloc() reuses preallocated zero clusters. If anything goes +wrong during the data write, we do not change their L2 entry, so we +must not let qcow2_alloc_cluster_abort() free them. + +Fixes: 8b24cd141549b5b264baeddd4e72902cfb5de23b +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-Id: <20200225143130.111267-2-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ede935fdbbd5f7b24b4724bbfb8938acb5956d8) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-cluster.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index 9d04f8d77b..1970797ce5 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -1015,7 +1015,7 @@ err: + void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) + { + BDRVQcow2State *s = bs->opaque; +- if (!has_data_file(bs)) { ++ if (!has_data_file(bs) && !m->keep_old_clusters) { + qcow2_free_clusters(bs, m->alloc_offset, + m->nb_clusters << s->cluster_bits, + QCOW2_DISCARD_NEVER); +-- +2.27.0 + diff --git a/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch new file mode 100644 index 0000000..1a7ace5 --- /dev/null +++ b/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch @@ -0,0 +1,52 @@ +From ecc4fb6e1941035e1d9def1f69b779fbea216caf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:13:07 +0000 +Subject: [PATCH 7/9] qcow2: Fix qcow2_alloc_cluster_abort() for external data + file + +RH-Author: Kevin Wolf +Message-id: <20200224161307.29783-2-kwolf@redhat.com> +Patchwork-id: 94042 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] qcow2: Fix qcow2_alloc_cluster_abort() for external data file +Bugzilla: 1703907 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +For external data file, cluster allocations return an offset in the data +file and are not refcounted. In this case, there is nothing to do for +qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file +is wrong and causes crashes in the better case or image corruption in +the worse case. + +Signed-off-by: Kevin Wolf +Message-Id: <20200211094900.17315-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c3b6658c1a5a3fb24d6c27b2594cf86146f75b22) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-cluster.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index 8982b7b..dc3c270 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -1015,8 +1015,11 @@ err: + void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) + { + BDRVQcow2State *s = bs->opaque; +- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, +- QCOW2_DISCARD_NEVER); ++ if (!has_data_file(bs)) { ++ qcow2_free_clusters(bs, m->alloc_offset, ++ m->nb_clusters << s->cluster_bits, ++ QCOW2_DISCARD_NEVER); ++ } + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch b/SOURCES/kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch new file mode 100644 index 0000000..522ba60 --- /dev/null +++ b/SOURCES/kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch @@ -0,0 +1,98 @@ +From 4290173219e15065e9a7c2e95774ac979b5fd869 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:40 +0100 +Subject: [PATCH 12/17] qcow2: Forward ZERO_WRITE flag for full preallocation + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-12-kwolf@redhat.com> +Patchwork-id: 97456 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 11/11] qcow2: Forward ZERO_WRITE flag for full preallocation +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the +image is possibly preallocated and then the zero flag is added to all +clusters. This means that a copy-on-write operation may be needed when +writing to these clusters, despite having used preallocation, negating +one of the major benefits of preallocation. + +Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver, +and if the protocol driver can ensure that the new area reads as zeros, +we can skip setting the zero flag in the qcow2 layer. + +Unfortunately, the same approach doesn't work for metadata +preallocation, so we'll still set the zero flag there. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +Message-Id: <20200424142701.67053-1-kwolf@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit eb8a0cf3ba26611f3981f8f45ac6a868975a68cc) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2.c | 22 +++++++++++++++++++--- + tests/qemu-iotests/274.out | 4 ++-- + 2 files changed, 21 insertions(+), 5 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index f3d6cb0..b783662 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -4153,9 +4153,25 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + /* Allocate the data area */ + new_file_size = allocation_start + + nb_new_data_clusters * s->cluster_size; +- /* Image file grows, so @exact does not matter */ +- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, +- errp); ++ /* ++ * Image file grows, so @exact does not matter. ++ * ++ * If we need to zero out the new area, try first whether the protocol ++ * driver can already take care of this. ++ */ ++ if (flags & BDRV_REQ_ZERO_WRITE) { ++ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, ++ BDRV_REQ_ZERO_WRITE, NULL); ++ if (ret >= 0) { ++ flags &= ~BDRV_REQ_ZERO_WRITE; ++ } ++ } else { ++ ret = -1; ++ } ++ if (ret < 0) { ++ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, ++ errp); ++ } + if (ret < 0) { + error_prepend(errp, "Failed to resize underlying file: "); + qcow2_free_clusters(bs, allocation_start, +diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out +index 1a796fd..9d6fdeb 100644 +--- a/tests/qemu-iotests/274.out ++++ b/tests/qemu-iotests/274.out +@@ -187,7 +187,7 @@ read 65536/65536 bytes at offset 9437184 + 10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) + + [{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, +-{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] ++{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}] + + === preallocation=full === + Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 +@@ -206,7 +206,7 @@ read 65536/65536 bytes at offset 11534336 + 4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) + + [{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, +-{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] ++{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}] + + === preallocation=off === + Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch b/SOURCES/kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch new file mode 100644 index 0000000..454759e --- /dev/null +++ b/SOURCES/kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch @@ -0,0 +1,101 @@ +From 3e603e344b81b3ecfea6fb9589ba91f70a22139d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:33 +0100 +Subject: [PATCH 05/17] qcow2: Support BDRV_REQ_ZERO_WRITE for truncate + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-5-kwolf@redhat.com> +Patchwork-id: 97449 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 04/11] qcow2: Support BDRV_REQ_ZERO_WRITE for truncate +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling +qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't +undo any previous preallocation, but just adds the zero flag to all +relevant L2 entries. If an external data file is in use, a write_zeroes +request to the data file is made instead. + +Signed-off-by: Kevin Wolf +Message-Id: <20200424125448.63318-5-kwolf@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit f01643fb8b47e8a70c04bbf45e0f12a9e5bc54de) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-cluster.c | 2 +- + block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index dc3c270..9d04f8d 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -1784,7 +1784,7 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, + /* Caller must pass aligned values, except at image end */ + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || +- end_offset == bs->total_sectors << BDRV_SECTOR_BITS); ++ end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); + + /* The zero flag is only supported by version 3 and newer */ + if (s->qcow_version < 3) { +diff --git a/block/qcow2.c b/block/qcow2.c +index 86aa74a..f3d6cb0 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1726,6 +1726,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, + } + + bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0; ++ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + + /* Repair image if dirty */ + if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && +@@ -4197,6 +4198,39 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + g_assert_not_reached(); + } + ++ if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { ++ uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size); ++ ++ /* ++ * Use zero clusters as much as we can. qcow2_cluster_zeroize() ++ * requires a cluster-aligned start. The end may be unaligned if it is ++ * at the end of the image (which it is here). ++ */ ++ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "Failed to zero out new clusters"); ++ goto fail; ++ } ++ ++ /* Write explicit zeros for the unaligned head */ ++ if (zero_start > old_length) { ++ uint64_t len = zero_start - old_length; ++ uint8_t *buf = qemu_blockalign0(bs, len); ++ QEMUIOVector qiov; ++ qemu_iovec_init_buf(&qiov, buf, len); ++ ++ qemu_co_mutex_unlock(&s->lock); ++ ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0); ++ qemu_co_mutex_lock(&s->lock); ++ ++ qemu_vfree(buf); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "Failed to zero out the new area"); ++ goto fail; ++ } ++ } ++ } ++ + if (prealloc != PREALLOC_MODE_OFF) { + /* Flush metadata before actually changing the image size */ + ret = qcow2_write_caches(bs); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch b/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch new file mode 100644 index 0000000..88a6e31 --- /dev/null +++ b/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch @@ -0,0 +1,92 @@ +From d84814e298e3b05fb5bc61cc8e641a5e104d32d5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:39 +0000 +Subject: [PATCH 07/18] qemu-file: Don't do IO after shutdown + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-7-quintela@redhat.com> +Patchwork-id: 94116 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/10] qemu-file: Don't do IO after shutdown +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +Be sure that we are not doing neither read/write after shutdown of the +QEMUFile. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit a555b8092abc6f1bbe4b64c516679cbd68fcfbd8) +Signed-off-by: Danilo C. L. de Paula +--- + migration/qemu-file.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 26fb25d..bbb2b63 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -53,6 +53,8 @@ struct QEMUFile { + + int last_error; + Error *last_error_obj; ++ /* has the file has been shutdown */ ++ bool shutdown; + }; + + /* +@@ -61,10 +63,18 @@ struct QEMUFile { + */ + int qemu_file_shutdown(QEMUFile *f) + { ++ int ret; ++ ++ f->shutdown = true; + if (!f->ops->shut_down) { + return -ENOSYS; + } +- return f->ops->shut_down(f->opaque, true, true, NULL); ++ ret = f->ops->shut_down(f->opaque, true, true, NULL); ++ ++ if (!f->last_error) { ++ qemu_file_set_error(f, -EIO); ++ } ++ return ret; + } + + /* +@@ -214,6 +224,9 @@ void qemu_fflush(QEMUFile *f) + return; + } + ++ if (f->shutdown) { ++ return; ++ } + if (f->iovcnt > 0) { + expect = iov_size(f->iov, f->iovcnt); + ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, +@@ -328,6 +341,10 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) + f->buf_index = 0; + f->buf_size = pending; + ++ if (f->shutdown) { ++ return 0; ++ } ++ + len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, + IO_BUF_SIZE - pending, &local_error); + if (len > 0) { +@@ -642,6 +659,9 @@ int64_t qemu_ftell(QEMUFile *f) + + int qemu_file_rate_limit(QEMUFile *f) + { ++ if (f->shutdown) { ++ return 1; ++ } + if (qemu_file_get_error(f)) { + return 1; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-img-Add-bitmap-sub-command.patch b/SOURCES/kvm-qemu-img-Add-bitmap-sub-command.patch new file mode 100644 index 0000000..eb80188 --- /dev/null +++ b/SOURCES/kvm-qemu-img-Add-bitmap-sub-command.patch @@ -0,0 +1,398 @@ +From 53baacb72e8561391841363b2acbd85a783cbc66 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:15 +0100 +Subject: [PATCH 10/26] qemu-img: Add bitmap sub-command + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-8-eblake@redhat.com> +Patchwork-id: 97074 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 07/12] qemu-img: Add bitmap sub-command +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Include actions for --add, --remove, --clear, --enable, --disable, and +--merge (note that --clear is a bit of fluff, because the same can be +accomplished by removing a bitmap and then adding a new one in its +place, but it matches what QMP commands exist). Listing is omitted, +because it does not require a bitmap name and because it was already +possible with 'qemu-img info'. A single command line can play one or +more bitmap commands in sequence on the same bitmap name (although all +added bitmaps share the same granularity, and and all merged bitmaps +come from the same source file). Merge defaults to other bitmaps in +the primary image, but can also be told to merge bitmaps from a +distinct image. + +While this supports --image-opts for the file being modified, I did +not think it worth the extra complexity to support that for the source +file in a cross-file merges. Likewise, I chose to have --merge only +take a single source rather than following the QMP support for +multiple merges in one go (although you can still use more than one +--merge in the command line); in part because qemu-img is offline and +therefore atomicity is not an issue. + +Upcoming patches will add iotest coverage of these commands while +also testing other features. + +Signed-off-by: Eric Blake +Reviewed-by: Max Reitz +Message-Id: <20200513011648.166876-7-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 3b51ab4bf0f49a01cc2db7b954e0669e081719b5) + +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + docs/tools/qemu-img.rst - lives in qemu-img.texi instead; plus + fix a typo in the text for --merge rather than waiting for + a one-line upstream followup patch + qemu-img-cmds.hx - context, use texi instead of rst + qemu-img.c - context +Signed-off-by: Eric Blake + +Signed-off-by: Danilo C. L. de Paula +--- + qemu-img-cmds.hx | 6 ++ + qemu-img.c | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ + qemu-img.texi | 27 ++++++ + 3 files changed, 281 insertions(+) + +diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx +index 1c93e6d..1a6a8e9 100644 +--- a/qemu-img-cmds.hx ++++ b/qemu-img-cmds.hx +@@ -25,6 +25,12 @@ STEXI + @item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename} + ETEXI + ++DEF("bitmap", img_bitmap, ++ "bitmap (--merge SOURCE | --add | --remove | --clear | --enable | --disable)... [-b source_file [-F source_fmt]] [-g granularity] [--object objectdef] [--image-opts | -f fmt] filename bitmap") ++STEXI ++.. option:: bitmap (--merge @var{source} | --add | --remove | --clear | --enable | --disable)... [-b @var{source_file} [-F @var{source_fmt}]] [-g @var{granularity}] [--object @var{objectdef}] [--image-opts | -f @var{fmt}] @var{filename} @var{bitmap} ++ETEXI ++ + DEF("check", img_check, + "check [--object objectdef] [--image-opts] [-q] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] [-U] filename") + STEXI +diff --git a/qemu-img.c b/qemu-img.c +index e69529b..11a4537 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -28,6 +28,7 @@ + #include "qemu-common.h" + #include "qemu-version.h" + #include "qapi/error.h" ++#include "qapi/qapi-commands-block-core.h" + #include "qapi/qapi-visit-block-core.h" + #include "qapi/qobject-output-visitor.h" + #include "qapi/qmp/qjson.h" +@@ -70,6 +71,12 @@ enum { + OPTION_PREALLOCATION = 265, + OPTION_SHRINK = 266, + OPTION_SALVAGE = 267, ++ OPTION_ADD = 269, ++ OPTION_REMOVE = 270, ++ OPTION_CLEAR = 271, ++ OPTION_ENABLE = 272, ++ OPTION_DISABLE = 273, ++ OPTION_MERGE = 274, + }; + + typedef enum OutputFormat { +@@ -168,6 +175,14 @@ static void QEMU_NORETURN help(void) + " '-n' skips the target volume creation (useful if the volume is created\n" + " prior to running qemu-img)\n" + "\n" ++ "Parameters to bitmap subcommand:\n" ++ " 'bitmap' is the name of the bitmap to manipulate, through one or more\n" ++ " actions from '--add', '--remove', '--clear', '--enable', '--disable',\n" ++ " or '--merge source'\n" ++ " '-g granularity' sets the granularity for '--add' actions\n" ++ " '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n" ++ " bitmaps from an alternative file\n" ++ "\n" + "Parameters to check subcommand:\n" + " '-r' tries to repair any inconsistencies that are found during the check.\n" + " '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n" +@@ -4402,6 +4417,239 @@ out: + return 0; + } + ++enum ImgBitmapAct { ++ BITMAP_ADD, ++ BITMAP_REMOVE, ++ BITMAP_CLEAR, ++ BITMAP_ENABLE, ++ BITMAP_DISABLE, ++ BITMAP_MERGE, ++}; ++typedef struct ImgBitmapAction { ++ enum ImgBitmapAct act; ++ const char *src; /* only used for merge */ ++ QSIMPLEQ_ENTRY(ImgBitmapAction) next; ++} ImgBitmapAction; ++ ++static int img_bitmap(int argc, char **argv) ++{ ++ Error *err = NULL; ++ int c, ret = 1; ++ QemuOpts *opts = NULL; ++ const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL; ++ const char *filename, *bitmap; ++ BlockBackend *blk = NULL, *src = NULL; ++ BlockDriverState *bs = NULL, *src_bs = NULL; ++ bool image_opts = false; ++ int64_t granularity = 0; ++ bool add = false, merge = false; ++ QSIMPLEQ_HEAD(, ImgBitmapAction) actions; ++ ImgBitmapAction *act, *act_next; ++ const char *op; ++ ++ QSIMPLEQ_INIT(&actions); ++ ++ for (;;) { ++ static const struct option long_options[] = { ++ {"help", no_argument, 0, 'h'}, ++ {"object", required_argument, 0, OPTION_OBJECT}, ++ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, ++ {"add", no_argument, 0, OPTION_ADD}, ++ {"remove", no_argument, 0, OPTION_REMOVE}, ++ {"clear", no_argument, 0, OPTION_CLEAR}, ++ {"enable", no_argument, 0, OPTION_ENABLE}, ++ {"disable", no_argument, 0, OPTION_DISABLE}, ++ {"merge", required_argument, 0, OPTION_MERGE}, ++ {"granularity", required_argument, 0, 'g'}, ++ {"source-file", required_argument, 0, 'b'}, ++ {"source-format", required_argument, 0, 'F'}, ++ {0, 0, 0, 0} ++ }; ++ c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL); ++ if (c == -1) { ++ break; ++ } ++ ++ switch (c) { ++ case ':': ++ missing_argument(argv[optind - 1]); ++ break; ++ case '?': ++ unrecognized_option(argv[optind - 1]); ++ break; ++ case 'h': ++ help(); ++ break; ++ case 'b': ++ src_filename = optarg; ++ break; ++ case 'f': ++ fmt = optarg; ++ break; ++ case 'F': ++ src_fmt = optarg; ++ break; ++ case 'g': ++ granularity = cvtnum("granularity", optarg); ++ if (granularity < 0) { ++ return 1; ++ } ++ break; ++ case OPTION_ADD: ++ act = g_new0(ImgBitmapAction, 1); ++ act->act = BITMAP_ADD; ++ QSIMPLEQ_INSERT_TAIL(&actions, act, next); ++ add = true; ++ break; ++ case OPTION_REMOVE: ++ act = g_new0(ImgBitmapAction, 1); ++ act->act = BITMAP_REMOVE; ++ QSIMPLEQ_INSERT_TAIL(&actions, act, next); ++ break; ++ case OPTION_CLEAR: ++ act = g_new0(ImgBitmapAction, 1); ++ act->act = BITMAP_CLEAR; ++ QSIMPLEQ_INSERT_TAIL(&actions, act, next); ++ break; ++ case OPTION_ENABLE: ++ act = g_new0(ImgBitmapAction, 1); ++ act->act = BITMAP_ENABLE; ++ QSIMPLEQ_INSERT_TAIL(&actions, act, next); ++ break; ++ case OPTION_DISABLE: ++ act = g_new0(ImgBitmapAction, 1); ++ act->act = BITMAP_DISABLE; ++ QSIMPLEQ_INSERT_TAIL(&actions, act, next); ++ break; ++ case OPTION_MERGE: ++ act = g_new0(ImgBitmapAction, 1); ++ act->act = BITMAP_MERGE; ++ act->src = optarg; ++ QSIMPLEQ_INSERT_TAIL(&actions, act, next); ++ merge = true; ++ break; ++ case OPTION_OBJECT: ++ opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); ++ if (!opts) { ++ goto out; ++ } ++ break; ++ case OPTION_IMAGE_OPTS: ++ image_opts = true; ++ break; ++ } ++ } ++ ++ if (qemu_opts_foreach(&qemu_object_opts, ++ user_creatable_add_opts_foreach, ++ qemu_img_object_print_help, &error_fatal)) { ++ goto out; ++ } ++ ++ if (QSIMPLEQ_EMPTY(&actions)) { ++ error_report("Need at least one of --add, --remove, --clear, " ++ "--enable, --disable, or --merge"); ++ goto out; ++ } ++ ++ if (granularity && !add) { ++ error_report("granularity only supported with --add"); ++ goto out; ++ } ++ if (src_fmt && !src_filename) { ++ error_report("-F only supported with -b"); ++ goto out; ++ } ++ if (src_filename && !merge) { ++ error_report("Merge bitmap source file only supported with " ++ "--merge"); ++ goto out; ++ } ++ ++ if (optind != argc - 2) { ++ error_report("Expecting filename and bitmap name"); ++ goto out; ++ } ++ ++ filename = argv[optind]; ++ bitmap = argv[optind + 1]; ++ ++ blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR, false, false, ++ false); ++ if (!blk) { ++ goto out; ++ } ++ bs = blk_bs(blk); ++ if (src_filename) { ++ src = img_open(false, src_filename, src_fmt, 0, false, false, false); ++ if (!src) { ++ goto out; ++ } ++ src_bs = blk_bs(src); ++ } else { ++ src_bs = bs; ++ } ++ ++ QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) { ++ switch (act->act) { ++ case BITMAP_ADD: ++ qmp_block_dirty_bitmap_add(bs->node_name, bitmap, ++ !!granularity, granularity, true, true, ++ false, false, &err); ++ op = "add"; ++ break; ++ case BITMAP_REMOVE: ++ qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err); ++ op = "remove"; ++ break; ++ case BITMAP_CLEAR: ++ qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err); ++ op = "clear"; ++ break; ++ case BITMAP_ENABLE: ++ qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err); ++ op = "enable"; ++ break; ++ case BITMAP_DISABLE: ++ qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err); ++ op = "disable"; ++ break; ++ case BITMAP_MERGE: { ++ BlockDirtyBitmapMergeSource *merge_src; ++ BlockDirtyBitmapMergeSourceList *list; ++ ++ merge_src = g_new0(BlockDirtyBitmapMergeSource, 1); ++ merge_src->type = QTYPE_QDICT; ++ merge_src->u.external.node = g_strdup(src_bs->node_name); ++ merge_src->u.external.name = g_strdup(act->src); ++ list = g_new0(BlockDirtyBitmapMergeSourceList, 1); ++ list->value = merge_src; ++ qmp_block_dirty_bitmap_merge(bs->node_name, bitmap, list, &err); ++ qapi_free_BlockDirtyBitmapMergeSourceList(list); ++ op = "merge"; ++ break; ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (err) { ++ error_reportf_err(err, "Operation %s on bitmap %s failed: ", ++ op, bitmap); ++ goto out; ++ } ++ g_free(act); ++ } ++ ++ ret = 0; ++ ++ out: ++ blk_unref(src); ++ blk_unref(blk); ++ qemu_opts_del(opts); ++ return ret; ++} ++ + #define C_BS 01 + #define C_COUNT 02 + #define C_IF 04 +diff --git a/qemu-img.texi b/qemu-img.texi +index b5156d6..abf2771 100644 +--- a/qemu-img.texi ++++ b/qemu-img.texi +@@ -230,6 +230,33 @@ specified as well. + For write tests, by default a buffer filled with zeros is written. This can be + overridden with a pattern byte specified by @var{pattern}. + ++@item bitmap (--merge @var{source} | --add | --remove | --clear | --enable | --disable)... [-b @var{source_file} [-F @var{source_fmt}]] [-g @var{granularity}] [--object @var{objectdef}] [--image-opts | -f @var{fmt}] @var{filename} @var{bitmap} ++ ++Perform one or more modifications of the persistent bitmap @var{bitmap} ++in the disk image @var{filename}. The various modifications are: ++ ++@table @option ++@item add ++create @var{bitmap}, enabled to record future edits. ++@item remove ++remove @var{bitmap}. ++@item clear ++clear @var{bitmap}. ++@item enable ++change @var{bitmap} to start recording future edits. ++@item disable ++change @var{bitmap} to stop recording future edits. ++@item merge @var{source} ++merge the contents of the @var{source} bitmap into @var{bitmap}. ++@end table ++ ++Additional options include @option{-g} which sets a non-default ++@var{granularity} for @option{--add}, and @option{-b} and @option{-F} ++which select an alternative source file for all @var{source} bitmaps used by ++@option{--merge}. ++ ++To see what bitmaps are present in an image, use @code{qemu-img info}. ++ + @item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] [-U] @var{filename} + + Perform a consistency check on the disk image @var{filename}. The command can +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-img-Add-convert-bitmaps-option.patch b/SOURCES/kvm-qemu-img-Add-convert-bitmaps-option.patch new file mode 100644 index 0000000..20eca9f --- /dev/null +++ b/SOURCES/kvm-qemu-img-Add-convert-bitmaps-option.patch @@ -0,0 +1,244 @@ +From f2add7d5955770318824c3eee774bec2dd850936 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:19 +0100 +Subject: [PATCH 14/26] qemu-img: Add convert --bitmaps option + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-12-eblake@redhat.com> +Patchwork-id: 97076 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 11/12] qemu-img: Add convert --bitmaps option +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Make it easier to copy all the persistent bitmaps of (the top layer +of) a source image along with its guest-visible contents, by adding a +boolean flag for use with qemu-img convert. This is basically +shorthand, as the same effect could be accomplished with a series of +'qemu-img bitmap --add' and 'qemu-img bitmap --merge -b source' +commands, or by their corresponding QMP commands. + +Note that this command will fail in the same scenarios where 'qemu-img +measure' omits a 'bitmaps size:' line, namely, when either the source +or the destination lacks persistent bitmap support altogether. + +See also https://bugzilla.redhat.com/show_bug.cgi?id=1779893 + +While touching this, clean up a couple coding issues spotted in the +same function: an extra blank line, and merging back-to-back 'if +(!skip_create)' blocks. + +Signed-off-by: Eric Blake +Message-Id: <20200521192137.1120211-5-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 15e39ad95078d528dfb9a75417453cab60332b77) + +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + docs/tools/qemu-img.rst - qemu-img.texi instead + qemu-img.c - context: no --target-is-zero + qemu-img-cmds.hx - context: texi instead of rst +Signed-off-by: Eric Blake + +Signed-off-by: Danilo C. L. de Paula +--- + qemu-img-cmds.hx | 4 ++-- + qemu-img.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- + qemu-img.texi | 4 +++- + 3 files changed, 72 insertions(+), 6 deletions(-) + +diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx +index 1a6a8e9..48144aa 100644 +--- a/qemu-img-cmds.hx ++++ b/qemu-img-cmds.hx +@@ -50,9 +50,9 @@ STEXI + ETEXI + + DEF("convert", img_convert, +- "convert [--object objectdef] [--image-opts] [--target-image-opts] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename") ++ "convert [--object objectdef] [--image-opts] [--target-image-opts] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename") + STEXI +-@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] [--salvage] @var{filename} [@var{filename2} [...]] @var{output_filename} ++@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] [--salvage] @var{filename} [@var{filename2} [...]] @var{output_filename} + ETEXI + + DEF("create", img_create, +diff --git a/qemu-img.c b/qemu-img.c +index 39e1586..6dc881b 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -77,6 +77,7 @@ enum { + OPTION_ENABLE = 272, + OPTION_DISABLE = 273, + OPTION_MERGE = 274, ++ OPTION_BITMAPS = 275, + }; + + typedef enum OutputFormat { +@@ -190,6 +191,7 @@ static void QEMU_NORETURN help(void) + " hiding corruption that has already occurred.\n" + "\n" + "Parameters to convert subcommand:\n" ++ " '--bitmaps' copies all top-level persistent bitmaps to destination\n" + " '-m' specifies how many coroutines work in parallel during the convert\n" + " process (defaults to 8)\n" + " '-W' allow to write to the target out of order rather than sequential\n" +@@ -2084,6 +2086,39 @@ static int convert_do_copy(ImgConvertState *s) + return s->ret; + } + ++static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) ++{ ++ BdrvDirtyBitmap *bm; ++ Error *err = NULL; ++ ++ FOR_EACH_DIRTY_BITMAP(src, bm) { ++ const char *name; ++ ++ if (!bdrv_dirty_bitmap_get_persistence(bm)) { ++ continue; ++ } ++ name = bdrv_dirty_bitmap_name(bm); ++ qmp_block_dirty_bitmap_add(dst->node_name, name, ++ true, bdrv_dirty_bitmap_granularity(bm), ++ true, true, ++ true, !bdrv_dirty_bitmap_enabled(bm), ++ &err); ++ if (err) { ++ error_reportf_err(err, "Failed to create bitmap %s: ", name); ++ return -1; ++ } ++ ++ do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name, ++ &err); ++ if (err) { ++ error_reportf_err(err, "Failed to populate bitmap %s: ", name); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ + #define MAX_BUF_SECTORS 32768 + + static int img_convert(int argc, char **argv) +@@ -2105,6 +2140,7 @@ static int img_convert(int argc, char **argv) + int64_t ret = -EINVAL; + bool force_share = false; + bool explict_min_sparse = false; ++ bool bitmaps = false; + + ImgConvertState s = (ImgConvertState) { + /* Need at least 4k of zeros for sparse detection */ +@@ -2123,6 +2159,7 @@ static int img_convert(int argc, char **argv) + {"force-share", no_argument, 0, 'U'}, + {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS}, + {"salvage", no_argument, 0, OPTION_SALVAGE}, ++ {"bitmaps", no_argument, 0, OPTION_BITMAPS}, + {0, 0, 0, 0} + }; + c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU", +@@ -2248,6 +2285,9 @@ static int img_convert(int argc, char **argv) + case OPTION_TARGET_IMAGE_OPTS: + tgt_image_opts = true; + break; ++ case OPTION_BITMAPS: ++ bitmaps = true; ++ break; + } + } + +@@ -2304,7 +2344,6 @@ static int img_convert(int argc, char **argv) + goto fail_getopt; + } + +- + /* ret is still -EINVAL until here */ + ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough); + if (ret < 0) { +@@ -2458,6 +2497,20 @@ static int img_convert(int argc, char **argv) + } + } + ++ /* Determine if bitmaps need copying */ ++ if (bitmaps) { ++ if (s.src_num > 1) { ++ error_report("Copying bitmaps only possible with single source"); ++ ret = -1; ++ goto out; ++ } ++ if (!bdrv_supports_persistent_dirty_bitmap(blk_bs(s.src[0]))) { ++ error_report("Source lacks bitmap support"); ++ ret = -1; ++ goto out; ++ } ++ } ++ + /* + * The later open call will need any decryption secrets, and + * bdrv_create() will purge "opts", so extract them now before +@@ -2466,9 +2519,7 @@ static int img_convert(int argc, char **argv) + if (!skip_create) { + open_opts = qdict_new(); + qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort); +- } + +- if (!skip_create) { + /* Create the new image */ + ret = bdrv_create(drv, out_filename, opts, &local_err); + if (ret < 0) { +@@ -2506,6 +2557,13 @@ static int img_convert(int argc, char **argv) + } + out_bs = blk_bs(s.target); + ++ if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) { ++ error_report("Format driver '%s' does not support bitmaps", ++ out_bs->drv->format_name); ++ ret = -1; ++ goto out; ++ } ++ + if (s.compressed && !block_driver_can_compress(out_bs->drv)) { + error_report("Compression not supported for this file format"); + ret = -1; +@@ -2565,6 +2623,12 @@ static int img_convert(int argc, char **argv) + } + + ret = convert_do_copy(&s); ++ ++ /* Now copy the bitmaps */ ++ if (bitmaps && ret == 0) { ++ ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs); ++ } ++ + out: + if (!ret) { + qemu_progress_print(100, 0); +diff --git a/qemu-img.texi b/qemu-img.texi +index 3670b96..b95d019 100644 +--- a/qemu-img.texi ++++ b/qemu-img.texi +@@ -161,6 +161,8 @@ Parameters to convert subcommand: + + @table @option + ++@item --bitmaps ++Additionally copy all persistent bitmaps from the top layer of the source + @item -n + Skip the creation of the target volume + @item -m +@@ -357,7 +359,7 @@ Error on reading data + + @end table + +-@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ++@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} + + Convert the disk image @var{filename} or a snapshot @var{snapshot_param} + to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch b/SOURCES/kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch new file mode 100644 index 0000000..c4012b7 --- /dev/null +++ b/SOURCES/kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch @@ -0,0 +1,89 @@ +From 58816c3709e5058e8805333ca011cc4e793d67ff Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:18 +0100 +Subject: [PATCH 13/26] qemu-img: Factor out code for merging bitmaps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-11-eblake@redhat.com> +Patchwork-id: 97078 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 10/12] qemu-img: Factor out code for merging bitmaps +Bugzilla: 1779893 1779904 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +The next patch will add another client that wants to merge dirty +bitmaps; it will be easier to refactor the code to construct the QAPI +struct correctly into a helper function. + +Signed-off-by: Eric Blake +Message-Id: <20200521192137.1120211-4-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 6c729dd832207d7347ecb074912f538e2942f269) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + qemu-img.c | 34 +++++++++++++++++++++------------- + 1 file changed, 21 insertions(+), 13 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index b57856e..39e1586 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -1582,6 +1582,24 @@ out4: + return ret; + } + ++/* Convenience wrapper around qmp_block_dirty_bitmap_merge */ ++static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name, ++ const char *src_node, const char *src_name, ++ Error **errp) ++{ ++ BlockDirtyBitmapMergeSource *merge_src; ++ BlockDirtyBitmapMergeSourceList *list; ++ ++ merge_src = g_new0(BlockDirtyBitmapMergeSource, 1); ++ merge_src->type = QTYPE_QDICT; ++ merge_src->u.external.node = g_strdup(src_node); ++ merge_src->u.external.name = g_strdup(src_name); ++ list = g_new0(BlockDirtyBitmapMergeSourceList, 1); ++ list->value = merge_src; ++ qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp); ++ qapi_free_BlockDirtyBitmapMergeSourceList(list); ++} ++ + enum ImgConvertBlockStatus { + BLK_DATA, + BLK_ZERO, +@@ -4614,21 +4632,11 @@ static int img_bitmap(int argc, char **argv) + qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err); + op = "disable"; + break; +- case BITMAP_MERGE: { +- BlockDirtyBitmapMergeSource *merge_src; +- BlockDirtyBitmapMergeSourceList *list; +- +- merge_src = g_new0(BlockDirtyBitmapMergeSource, 1); +- merge_src->type = QTYPE_QDICT; +- merge_src->u.external.node = g_strdup(src_bs->node_name); +- merge_src->u.external.name = g_strdup(act->src); +- list = g_new0(BlockDirtyBitmapMergeSourceList, 1); +- list->value = merge_src; +- qmp_block_dirty_bitmap_merge(bs->node_name, bitmap, list, &err); +- qapi_free_BlockDirtyBitmapMergeSourceList(list); ++ case BITMAP_MERGE: ++ do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name, ++ act->src, &err); + op = "merge"; + break; +- } + default: + g_assert_not_reached(); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch b/SOURCES/kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch new file mode 100644 index 0000000..b4180b9 --- /dev/null +++ b/SOURCES/kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch @@ -0,0 +1,241 @@ +From 1a8a4ece5def912e7cfa5ef8565fc8ecef6e72c3 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 Jun 2020 02:34:11 +0100 +Subject: [PATCH 06/26] qemu_img: add cvtnum_full to print error reports + +RH-Author: Eric Blake +Message-id: <20200602023420.2133649-4-eblake@redhat.com> +Patchwork-id: 97067 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 03/12] qemu_img: add cvtnum_full to print error reports +Bugzilla: 1779893 1779904 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +From: Eyal Moscovici + +All calls to cvtnum check the return value and print the same error +message more or less. And so error reporting moved to cvtnum_full to +reduce code duplication and provide a single error +message. Additionally, cvtnum now wraps cvtnum_full with the existing +default range of 0 to MAX_INT64. + +Acked-by: Mark Kanda +Signed-off-by: Eyal Moscovici +Message-Id: <20200513133629.18508-2-eyal.moscovici@oracle.com> +Reviewed-by: Eric Blake +[eblake: fix printf formatting, avoid trailing space, change error wording, +reformat commit message] +Signed-off-by: Eric Blake +(cherry picked from commit 43d589b074370ebc9b340340b5f641b385da9df8) +Signed-off-by: Eric Blake + +Signed-off-by: Danilo C. L. de Paula +--- + qemu-img.c | 76 +++++++++++++++++++++------------------------- + tests/qemu-iotests/049.out | 8 ++--- + 2 files changed, 38 insertions(+), 46 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index 95a24b9..e69529b 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -422,19 +422,31 @@ static int add_old_style_options(const char *fmt, QemuOpts *opts, + return 0; + } + +-static int64_t cvtnum(const char *s) ++static int64_t cvtnum_full(const char *name, const char *value, int64_t min, ++ int64_t max) + { + int err; +- uint64_t value; +- +- err = qemu_strtosz(s, NULL, &value); +- if (err < 0) { ++ uint64_t res; ++ ++ err = qemu_strtosz(value, NULL, &res); ++ if (err < 0 && err != -ERANGE) { ++ error_report("Invalid %s specified. You may use " ++ "k, M, G, T, P or E suffixes for", name); ++ error_report("kilobytes, megabytes, gigabytes, terabytes, " ++ "petabytes and exabytes."); + return err; + } +- if (value > INT64_MAX) { ++ if (err == -ERANGE || res > max || res < min) { ++ error_report("Invalid %s specified. Must be between %" PRId64 ++ " and %" PRId64 ".", name, min, max); + return -ERANGE; + } +- return value; ++ return res; ++} ++ ++static int64_t cvtnum(const char *name, const char *value) ++{ ++ return cvtnum_full(name, value, 0, INT64_MAX); + } + + static int img_create(int argc, char **argv) +@@ -532,16 +544,8 @@ static int img_create(int argc, char **argv) + if (optind < argc) { + int64_t sval; + +- sval = cvtnum(argv[optind++]); ++ sval = cvtnum("image size", argv[optind++]); + if (sval < 0) { +- if (sval == -ERANGE) { +- error_report("Image size must be less than 8 EiB!"); +- } else { +- error_report("Invalid image size specified! You may use k, M, " +- "G, T, P or E suffixes for "); +- error_report("kilobytes, megabytes, gigabytes, terabytes, " +- "petabytes and exabytes."); +- } + goto fail; + } + img_size = (uint64_t)sval; +@@ -2148,8 +2152,10 @@ static int img_convert(int argc, char **argv) + { + int64_t sval; + +- sval = cvtnum(optarg); +- if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) || ++ sval = cvtnum("buffer size for sparse output", optarg); ++ if (sval < 0) { ++ goto fail_getopt; ++ } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) || + sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) { + error_report("Invalid buffer size for sparse output specified. " + "Valid sizes are multiples of %llu up to %llu. Select " +@@ -4229,9 +4235,8 @@ static int img_bench(int argc, char **argv) + break; + case 'o': + { +- offset = cvtnum(optarg); ++ offset = cvtnum("offset", optarg); + if (offset < 0) { +- error_report("Invalid offset specified"); + return 1; + } + break; +@@ -4244,9 +4249,8 @@ static int img_bench(int argc, char **argv) + { + int64_t sval; + +- sval = cvtnum(optarg); +- if (sval < 0 || sval > INT_MAX) { +- error_report("Invalid buffer size specified"); ++ sval = cvtnum_full("buffer size", optarg, 0, INT_MAX); ++ if (sval < 0) { + return 1; + } + +@@ -4257,9 +4261,8 @@ static int img_bench(int argc, char **argv) + { + int64_t sval; + +- sval = cvtnum(optarg); +- if (sval < 0 || sval > INT_MAX) { +- error_report("Invalid step size specified"); ++ sval = cvtnum_full("step_size", optarg, 0, INT_MAX); ++ if (sval < 0) { + return 1; + } + +@@ -4429,10 +4432,9 @@ static int img_dd_bs(const char *arg, + { + int64_t res; + +- res = cvtnum(arg); ++ res = cvtnum_full("bs", arg, 1, INT_MAX); + +- if (res <= 0 || res > INT_MAX) { +- error_report("invalid number: '%s'", arg); ++ if (res < 0) { + return 1; + } + in->bsz = out->bsz = res; +@@ -4444,10 +4446,9 @@ static int img_dd_count(const char *arg, + struct DdIo *in, struct DdIo *out, + struct DdInfo *dd) + { +- dd->count = cvtnum(arg); ++ dd->count = cvtnum("count", arg); + + if (dd->count < 0) { +- error_report("invalid number: '%s'", arg); + return 1; + } + +@@ -4476,10 +4477,9 @@ static int img_dd_skip(const char *arg, + struct DdIo *in, struct DdIo *out, + struct DdInfo *dd) + { +- in->offset = cvtnum(arg); ++ in->offset = cvtnum("skip", arg); + + if (in->offset < 0) { +- error_report("invalid number: '%s'", arg); + return 1; + } + +@@ -4869,16 +4869,8 @@ static int img_measure(int argc, char **argv) + { + int64_t sval; + +- sval = cvtnum(optarg); ++ sval = cvtnum("image size", optarg); + if (sval < 0) { +- if (sval == -ERANGE) { +- error_report("Image size must be less than 8 EiB!"); +- } else { +- error_report("Invalid image size specified! You may use " +- "k, M, G, T, P or E suffixes for "); +- error_report("kilobytes, megabytes, gigabytes, terabytes, " +- "petabytes and exabytes."); +- } + goto out; + } + img_size = (uint64_t)sval; +diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out +index 6b50540..8b35f3d 100644 +--- a/tests/qemu-iotests/049.out ++++ b/tests/qemu-iotests/049.out +@@ -92,19 +92,19 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1649267441664 cluster_size=65536 l + == 3. Invalid sizes == + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024 +-qemu-img: Image size must be less than 8 EiB! ++qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. + + qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 + qemu-img: TEST_DIR/t.qcow2: Value '-1024' is out of range for parameter 'size' + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k +-qemu-img: Image size must be less than 8 EiB! ++qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. + + qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 + qemu-img: TEST_DIR/t.qcow2: Value '-1k' is out of range for parameter 'size' + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte +-qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for ++qemu-img: Invalid image size specified. You may use k, M, G, T, P or E suffixes for + qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. + + qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2 +@@ -113,7 +113,7 @@ Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- + and exabytes, respectively. + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar +-qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for ++qemu-img: Invalid image size specified. You may use k, M, G, T, P or E suffixes for + qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. + + qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qga-add-command-guest-get-disks.patch b/SOURCES/kvm-qga-add-command-guest-get-disks.patch new file mode 100644 index 0000000..89218f9 --- /dev/null +++ b/SOURCES/kvm-qga-add-command-guest-get-disks.patch @@ -0,0 +1,117 @@ +From 9ed672656f15a47bdc0f9af0f96e55132ad5c0cf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:57 -0500 +Subject: [PATCH 06/10] qga: add command guest-get-disks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-7-marcandre.lureau@redhat.com> +Patchwork-id: 100525 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 06/10] qga: add command guest-get-disks +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Tomáš Golembiovský + +Add API and stubs for new guest-get-disks command. + +The command guest-get-fsinfo can be used to list information about disks +and partitions but it is limited only to mounted disks with filesystem. +This new command should allow listing information about disks of the VM +regardles whether they are mounted or not. This can be usefull for +management applications for mapping virtualized devices or pass-through +devices to device names in the guest OS. + +Signed-off-by: Tomáš Golembiovský +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Marc-André Lureau +Signed-off-by: Michael Roth + +(cherry-picked from commit c27ea3f9ef7c7f29e55bde91879f8514abce9c38) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 6 ++++++ + qga/commands-win32.c | 6 ++++++ + qga/qapi-schema.json | 31 +++++++++++++++++++++++++++++++ + 3 files changed, 43 insertions(+) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 1c1a165daed..9b690f3cceb 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -2978,3 +2978,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} +diff --git a/qga/commands-win32.c b/qga/commands-win32.c +index 55ba5b263af..be63fa2b208 100644 +--- a/qga/commands-win32.c ++++ b/qga/commands-win32.c +@@ -2234,3 +2234,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} +diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json +index fb4605cc19c..22df375c92f 100644 +--- a/qga/qapi-schema.json ++++ b/qga/qapi-schema.json +@@ -852,6 +852,37 @@ + 'bus': 'int', 'target': 'int', 'unit': 'int', + '*serial': 'str', '*dev': 'str'} } + ++## ++# @GuestDiskInfo: ++# ++# @name: device node (Linux) or device UNC (Windows) ++# @partition: whether this is a partition or disk ++# @dependents: list of dependent devices; e.g. for LVs of the LVM this will ++# hold the list of PVs, for LUKS encrypted volume this will ++# contain the disk where the volume is placed. (Linux) ++# @address: disk address information (only for non-virtual devices) ++# @alias: optional alias assigned to the disk, on Linux this is a name assigned ++# by device mapper ++# ++# Since 5.2 ++## ++{ 'struct': 'GuestDiskInfo', ++ 'data': {'name': 'str', 'partition': 'bool', 'dependents': ['str'], ++ '*address': 'GuestDiskAddress', '*alias': 'str'} } ++ ++## ++# @guest-get-disks: ++# ++# Returns: The list of disks in the guest. For Windows these are only the ++# physical disks. On Linux these are all root block devices of ++# non-zero size including e.g. removable devices, loop devices, ++# NBD, etc. ++# ++# Since: 5.2 ++## ++{ 'command': 'guest-get-disks', ++ 'returns': ['GuestDiskInfo'] } ++ + ## + # @GuestFilesystemInfo: + # +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch new file mode 100644 index 0000000..2b3cee5 --- /dev/null +++ b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch @@ -0,0 +1,431 @@ +From 7be40868e976b12107a0111dcd5ee5fe77e9df61 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:58 -0500 +Subject: [PATCH 07/10] qga: add implementation of guest-get-disks for Linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-8-marcandre.lureau@redhat.com> +Patchwork-id: 100526 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 07/10] qga: add implementation of guest-get-disks for Linux +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Tomáš Golembiovský + +The command lists all disks (real and virtual) as well as disk +partitions. For each disk the list of dependent disks is also listed and +/dev path is used as a handle so it can be matched with "name" field of +other returned disk entries. For disk partitions the "dependents" list +is populated with the the parent device for easier tracking of +hierarchy. + +Example output: +{ + "return": [ + ... + { + "name": "/dev/dm-0", + "partition": false, + "dependents": [ + "/dev/sda2" + ], + "alias": "luks-7062202e-5b9b-433e-81e8-6628c40da9f7" + }, + { + "name": "/dev/sda2", + "partition": true, + "dependents": [ + "/dev/sda" + ] + }, + { + "name": "/dev/sda", + "partition": false, + "address": { + "serial": "SAMSUNG_MZ7LN512HCHP-000L1_S1ZKNXAG822493", + "bus-type": "sata", + ... + "dev": "/dev/sda", + "target": 0 + }, + "dependents": [] + }, + ... + ] +} + +Signed-off-by: Tomáš Golembiovský +Reviewed-by: Marc-André Lureau +*add missing stub for !defined(CONFIG_FSFREEZE) +*remove unused deps_dir variable +Signed-off-by: Michael Roth + +(cherry picked from commit fed3956429d560a06fc2d2fcf1a01efb58659f87) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 303 +++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 292 insertions(+), 11 deletions(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 9b690f3cceb..af80bb7ccbb 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1091,13 +1091,27 @@ static void build_guest_fsinfo_for_virtual_device(char const *syspath, + closedir(dir); + } + ++static bool is_disk_virtual(const char *devpath, Error **errp) ++{ ++ g_autofree char *syspath = realpath(devpath, NULL); ++ ++ if (!syspath) { ++ error_setg_errno(errp, errno, "realpath(\"%s\")", devpath); ++ return false; ++ } ++ return strstr(syspath, "/devices/virtual/block/") != NULL; ++} ++ + /* Dispatch to functions for virtual/real device */ + static void build_guest_fsinfo_for_device(char const *devpath, + GuestFilesystemInfo *fs, + Error **errp) + { +- char *syspath = realpath(devpath, NULL); ++ ERRP_GUARD(); ++ g_autofree char *syspath = NULL; ++ bool is_virtual = false; + ++ syspath = realpath(devpath, NULL); + if (!syspath) { + error_setg_errno(errp, errno, "realpath(\"%s\")", devpath); + return; +@@ -1108,16 +1122,281 @@ static void build_guest_fsinfo_for_device(char const *devpath, + } + + g_debug(" parse sysfs path '%s'", syspath); +- +- if (strstr(syspath, "/devices/virtual/block/")) { ++ is_virtual = is_disk_virtual(syspath, errp); ++ if (*errp != NULL) { ++ return; ++ } ++ if (is_virtual) { + build_guest_fsinfo_for_virtual_device(syspath, fs, errp); + } else { + build_guest_fsinfo_for_real_device(syspath, fs, errp); + } ++} ++ ++#ifdef CONFIG_LIBUDEV ++ ++/* ++ * Wrapper around build_guest_fsinfo_for_device() for getting just ++ * the disk address. ++ */ ++static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp) ++{ ++ g_autoptr(GuestFilesystemInfo) fs = NULL; + +- free(syspath); ++ fs = g_new0(GuestFilesystemInfo, 1); ++ build_guest_fsinfo_for_device(syspath, fs, errp); ++ if (fs->disk != NULL) { ++ return g_steal_pointer(&fs->disk->value); ++ } ++ return NULL; + } + ++static char *get_alias_for_syspath(const char *syspath) ++{ ++ struct udev *udev = NULL; ++ struct udev_device *udevice = NULL; ++ char *ret = NULL; ++ ++ udev = udev_new(); ++ if (udev == NULL) { ++ g_debug("failed to query udev"); ++ goto out; ++ } ++ udevice = udev_device_new_from_syspath(udev, syspath); ++ if (udevice == NULL) { ++ g_debug("failed to query udev for path: %s", syspath); ++ goto out; ++ } else { ++ const char *alias = udev_device_get_property_value( ++ udevice, "DM_NAME"); ++ /* ++ * NULL means there was an error and empty string means there is no ++ * alias. In case of no alias we return NULL instead of empty string. ++ */ ++ if (alias == NULL) { ++ g_debug("failed to query udev for device alias for: %s", ++ syspath); ++ } else if (*alias != 0) { ++ ret = g_strdup(alias); ++ } ++ } ++ ++out: ++ udev_unref(udev); ++ udev_device_unref(udevice); ++ return ret; ++} ++ ++static char *get_device_for_syspath(const char *syspath) ++{ ++ struct udev *udev = NULL; ++ struct udev_device *udevice = NULL; ++ char *ret = NULL; ++ ++ udev = udev_new(); ++ if (udev == NULL) { ++ g_debug("failed to query udev"); ++ goto out; ++ } ++ udevice = udev_device_new_from_syspath(udev, syspath); ++ if (udevice == NULL) { ++ g_debug("failed to query udev for path: %s", syspath); ++ goto out; ++ } else { ++ ret = g_strdup(udev_device_get_devnode(udevice)); ++ } ++ ++out: ++ udev_unref(udev); ++ udev_device_unref(udevice); ++ return ret; ++} ++ ++static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) ++{ ++ g_autofree char *deps_dir = NULL; ++ const gchar *dep; ++ GDir *dp_deps = NULL; ++ ++ /* List dependent disks */ ++ deps_dir = g_strdup_printf("%s/slaves", disk_dir); ++ g_debug(" listing entries in: %s", deps_dir); ++ dp_deps = g_dir_open(deps_dir, 0, NULL); ++ if (dp_deps == NULL) { ++ g_debug("failed to list entries in %s", deps_dir); ++ return; ++ } ++ while ((dep = g_dir_read_name(dp_deps)) != NULL) { ++ g_autofree char *dep_dir = NULL; ++ strList *dep_item = NULL; ++ char *dev_name; ++ ++ /* Add dependent disks */ ++ dep_dir = g_strdup_printf("%s/%s", deps_dir, dep); ++ dev_name = get_device_for_syspath(dep_dir); ++ if (dev_name != NULL) { ++ g_debug(" adding dependent device: %s", dev_name); ++ dep_item = g_new0(strList, 1); ++ dep_item->value = dev_name; ++ dep_item->next = disk->dependents; ++ disk->dependents = dep_item; ++ } ++ } ++ g_dir_close(dp_deps); ++} ++ ++/* ++ * Detect partitions subdirectory, name is "" or ++ * "p" ++ * ++ * @disk_name -- last component of /sys path (e.g. sda) ++ * @disk_dir -- sys path of the disk (e.g. /sys/block/sda) ++ * @disk_dev -- device node of the disk (e.g. /dev/sda) ++ */ ++static GuestDiskInfoList *get_disk_partitions( ++ GuestDiskInfoList *list, ++ const char *disk_name, const char *disk_dir, ++ const char *disk_dev) ++{ ++ GuestDiskInfoList *item, *ret = list; ++ struct dirent *de_disk; ++ DIR *dp_disk = NULL; ++ size_t len = strlen(disk_name); ++ ++ dp_disk = opendir(disk_dir); ++ while ((de_disk = readdir(dp_disk)) != NULL) { ++ g_autofree char *partition_dir = NULL; ++ char *dev_name; ++ GuestDiskInfo *partition; ++ ++ if (!(de_disk->d_type & DT_DIR)) { ++ continue; ++ } ++ ++ if (!(strncmp(disk_name, de_disk->d_name, len) == 0 && ++ ((*(de_disk->d_name + len) == 'p' && ++ isdigit(*(de_disk->d_name + len + 1))) || ++ isdigit(*(de_disk->d_name + len))))) { ++ continue; ++ } ++ ++ partition_dir = g_strdup_printf("%s/%s", ++ disk_dir, de_disk->d_name); ++ dev_name = get_device_for_syspath(partition_dir); ++ if (dev_name == NULL) { ++ g_debug("Failed to get device name for syspath: %s", ++ disk_dir); ++ continue; ++ } ++ partition = g_new0(GuestDiskInfo, 1); ++ partition->name = dev_name; ++ partition->partition = true; ++ /* Add parent disk as dependent for easier tracking of hierarchy */ ++ partition->dependents = g_new0(strList, 1); ++ partition->dependents->value = g_strdup(disk_dev); ++ ++ item = g_new0(GuestDiskInfoList, 1); ++ item->value = partition; ++ item->next = ret; ++ ret = item; ++ ++ } ++ closedir(dp_disk); ++ ++ return ret; ++} ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ GuestDiskInfoList *item, *ret = NULL; ++ GuestDiskInfo *disk; ++ DIR *dp = NULL; ++ struct dirent *de = NULL; ++ ++ g_debug("listing /sys/block directory"); ++ dp = opendir("/sys/block"); ++ if (dp == NULL) { ++ error_setg_errno(errp, errno, "Can't open directory \"/sys/block\""); ++ return NULL; ++ } ++ while ((de = readdir(dp)) != NULL) { ++ g_autofree char *disk_dir = NULL, *line = NULL, ++ *size_path = NULL; ++ char *dev_name; ++ Error *local_err = NULL; ++ if (de->d_type != DT_LNK) { ++ g_debug(" skipping entry: %s", de->d_name); ++ continue; ++ } ++ ++ /* Check size and skip zero-sized disks */ ++ g_debug(" checking disk size"); ++ size_path = g_strdup_printf("/sys/block/%s/size", de->d_name); ++ if (!g_file_get_contents(size_path, &line, NULL, NULL)) { ++ g_debug(" failed to read disk size"); ++ continue; ++ } ++ if (g_strcmp0(line, "0\n") == 0) { ++ g_debug(" skipping zero-sized disk"); ++ continue; ++ } ++ ++ g_debug(" adding %s", de->d_name); ++ disk_dir = g_strdup_printf("/sys/block/%s", de->d_name); ++ dev_name = get_device_for_syspath(disk_dir); ++ if (dev_name == NULL) { ++ g_debug("Failed to get device name for syspath: %s", ++ disk_dir); ++ continue; ++ } ++ disk = g_new0(GuestDiskInfo, 1); ++ disk->name = dev_name; ++ disk->partition = false; ++ disk->alias = get_alias_for_syspath(disk_dir); ++ disk->has_alias = (disk->alias != NULL); ++ item = g_new0(GuestDiskInfoList, 1); ++ item->value = disk; ++ item->next = ret; ++ ret = item; ++ ++ /* Get address for non-virtual devices */ ++ bool is_virtual = is_disk_virtual(disk_dir, &local_err); ++ if (local_err != NULL) { ++ g_debug(" failed to check disk path, ignoring error: %s", ++ error_get_pretty(local_err)); ++ error_free(local_err); ++ local_err = NULL; ++ /* Don't try to get the address */ ++ is_virtual = true; ++ } ++ if (!is_virtual) { ++ disk->address = get_disk_address(disk_dir, &local_err); ++ if (local_err != NULL) { ++ g_debug(" failed to get device info, ignoring error: %s", ++ error_get_pretty(local_err)); ++ error_free(local_err); ++ local_err = NULL; ++ } else if (disk->address != NULL) { ++ disk->has_address = true; ++ } ++ } ++ ++ get_disk_deps(disk_dir, disk); ++ ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name); ++ } ++ return ret; ++} ++ ++#else ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} ++ ++#endif ++ + /* Return a list of the disk device(s)' info which @mount lies on */ + static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount, + Error **errp) +@@ -2709,6 +2988,13 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp) + + return 0; + } ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} ++ + #endif /* CONFIG_FSFREEZE */ + + #if !defined(CONFIG_FSTRIM) +@@ -2745,7 +3031,8 @@ GList *ga_command_blacklist_init(GList *blacklist) + const char *list[] = { + "guest-get-fsinfo", "guest-fsfreeze-status", + "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list", +- "guest-fsfreeze-thaw", "guest-get-fsinfo", NULL}; ++ "guest-fsfreeze-thaw", "guest-get-fsinfo", ++ "guest-get-disks", NULL}; + char **p = (char **)list; + + while (*p) { +@@ -2978,9 +3265,3 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } +- +-GuestDiskInfoList *qmp_guest_get_disks(Error **errp) +-{ +- error_setg(errp, QERR_UNSUPPORTED); +- return NULL; +-} +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch new file mode 100644 index 0000000..37e8ad6 --- /dev/null +++ b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch @@ -0,0 +1,182 @@ +From 5c5fbed35d6c8b44533ff8a29afb9a85131ea7e9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:40:59 -0500 +Subject: [PATCH 08/10] qga: add implementation of guest-get-disks for Windows +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-9-marcandre.lureau@redhat.com> +Patchwork-id: 100527 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 08/10] qga: add implementation of guest-get-disks for Windows +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Tomáš Golembiovský + +The command lists all the physical disk drives. Unlike for Linux +partitions and virtual volumes are not listed. + +Example output: + +{ + "return": [ + { + "name": "\\\\.\\PhysicalDrive0", + "partition": false, + "address": { + "serial": "QM00001", + "bus-type": "sata", + ... + }, + "dependents": [] + } + ] +} + +Signed-off-by: Tomáš Golembiovský +Signed-off-by: Michael Roth + +(cherry picked from commit c67d2efd9d1771fd886e3b58771adaa62897f3d9) +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-win32.c | 107 ++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 101 insertions(+), 6 deletions(-) + +diff --git a/qga/commands-win32.c b/qga/commands-win32.c +index be63fa2b208..a07725e874b 100644 +--- a/qga/commands-win32.c ++++ b/qga/commands-win32.c +@@ -960,6 +960,101 @@ out: + return list; + } + ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ ERRP_GUARD(); ++ GuestDiskInfoList *new = NULL, *ret = NULL; ++ HDEVINFO dev_info; ++ SP_DEVICE_INTERFACE_DATA dev_iface_data; ++ int i; ++ ++ dev_info = SetupDiGetClassDevs(&GUID_DEVINTERFACE_DISK, 0, 0, ++ DIGCF_PRESENT | DIGCF_DEVICEINTERFACE); ++ if (dev_info == INVALID_HANDLE_VALUE) { ++ error_setg_win32(errp, GetLastError(), "failed to get device tree"); ++ return NULL; ++ } ++ ++ g_debug("enumerating devices"); ++ dev_iface_data.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA); ++ for (i = 0; ++ SetupDiEnumDeviceInterfaces(dev_info, NULL, &GUID_DEVINTERFACE_DISK, ++ i, &dev_iface_data); ++ i++) { ++ GuestDiskAddress *address = NULL; ++ GuestDiskInfo *disk = NULL; ++ Error *local_err = NULL; ++ g_autofree PSP_DEVICE_INTERFACE_DETAIL_DATA ++ pdev_iface_detail_data = NULL; ++ STORAGE_DEVICE_NUMBER sdn; ++ HANDLE dev_file; ++ DWORD size = 0; ++ BOOL result; ++ int attempt; ++ ++ g_debug(" getting device path"); ++ for (attempt = 0, result = FALSE; attempt < 2 && !result; attempt++) { ++ result = SetupDiGetDeviceInterfaceDetail(dev_info, ++ &dev_iface_data, pdev_iface_detail_data, size, &size, NULL); ++ if (result) { ++ break; ++ } ++ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { ++ pdev_iface_detail_data = g_realloc(pdev_iface_detail_data, ++ size); ++ pdev_iface_detail_data->cbSize = ++ sizeof(*pdev_iface_detail_data); ++ } else { ++ g_debug("failed to get device interface details"); ++ break; ++ } ++ } ++ if (!result) { ++ g_debug("skipping device"); ++ continue; ++ } ++ ++ g_debug(" device: %s", pdev_iface_detail_data->DevicePath); ++ dev_file = CreateFile(pdev_iface_detail_data->DevicePath, 0, ++ FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL); ++ if (!DeviceIoControl(dev_file, IOCTL_STORAGE_GET_DEVICE_NUMBER, ++ NULL, 0, &sdn, sizeof(sdn), &size, NULL)) { ++ CloseHandle(dev_file); ++ debug_error("failed to get storage device number"); ++ continue; ++ } ++ CloseHandle(dev_file); ++ ++ disk = g_new0(GuestDiskInfo, 1); ++ disk->name = g_strdup_printf("\\\\.\\PhysicalDrive%lu", ++ sdn.DeviceNumber); ++ ++ g_debug(" number: %lu", sdn.DeviceNumber); ++ address = g_malloc0(sizeof(GuestDiskAddress)); ++ address->has_dev = true; ++ address->dev = g_strdup(disk->name); ++ get_single_disk_info(sdn.DeviceNumber, address, &local_err); ++ if (local_err) { ++ g_debug("failed to get disk info: %s", ++ error_get_pretty(local_err)); ++ error_free(local_err); ++ qapi_free_GuestDiskAddress(address); ++ address = NULL; ++ } else { ++ disk->address = address; ++ disk->has_address = true; ++ } ++ ++ new = g_malloc0(sizeof(GuestDiskInfoList)); ++ new->value = disk; ++ new->next = ret; ++ ret = new; ++ } ++ ++ SetupDiDestroyDeviceInfoList(dev_info); ++ return ret; ++} ++ + #else + + static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) +@@ -967,6 +1062,12 @@ static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) + return NULL; + } + ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} ++ + #endif /* CONFIG_QGA_NTDDSCSI */ + + static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp) +@@ -2234,9 +2335,3 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } +- +-GuestDiskInfoList *qmp_guest_get_disks(Error **errp) +-{ +- error_setg(errp, QERR_UNSUPPORTED); +- return NULL; +-} +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch b/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch new file mode 100644 index 0000000..d16f0c3 --- /dev/null +++ b/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch @@ -0,0 +1,61 @@ +From 1f47578a5af4e7d4e1587d2334a07e867cf819d4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Tue, 24 Nov 2020 08:43:38 -0500 +Subject: [PATCH] qga: fix assert regression on guest-shutdown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201124084338.199348-2-marcandre.lureau@redhat.com> +Patchwork-id: 99877 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 1/1] qga: fix assert regression on guest-shutdown +Bugzilla: 1900578 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Philippe Mathieu-Daudé + +From: Marc-André Lureau + +Since commit 781f2b3d1e ("qga: process_event() simplification"), +send_response() is called unconditionally, but will assert when "rsp" is +NULL. This may happen with QCO_NO_SUCCESS_RESP commands, such as +"guest-shutdown". + +Fixes: 781f2b3d1e5ef389b44016a897fd55e7a780bf35 +Cc: Michael Roth +Reported-by: Christian Ehrhardt +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Christian Ehrhardt +Tested-by: Christian Ehrhardt +Cc: qemu-stable@nongnu.org +Signed-off-by: Michael Roth + +(cherry picked from commit 844bd70b5652f30bbace89499f513e3fbbb6457a) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/main.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/qga/main.c b/qga/main.c +index c35c2a21209..12fa463f4cd 100644 +--- a/qga/main.c ++++ b/qga/main.c +@@ -529,7 +529,11 @@ static int send_response(GAState *s, const QDict *rsp) + QString *payload_qstr, *response_qstr; + GIOStatus status; + +- g_assert(rsp && s->channel); ++ g_assert(s->channel); ++ ++ if (!rsp) { ++ return 0; ++ } + + payload_qstr = qobject_to_json(QOBJECT(rsp)); + if (!payload_qstr) { +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch b/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch new file mode 100644 index 0000000..a01ecc0 --- /dev/null +++ b/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch @@ -0,0 +1,56 @@ +From 12e5342e10c384539c8c26f075ce98ebbe887a05 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:41:00 -0500 +Subject: [PATCH 09/10] qga: fix missing closedir() in qmp_guest_get_disks() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-10-marcandre.lureau@redhat.com> +Patchwork-id: 100528 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 09/10] qga: fix missing closedir() in qmp_guest_get_disks() +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Michael Roth + +We opendir("/sys/block") at the beginning of the function, but we never +close it prior to returning. + +Fixes: Coverity CID 1436130 +Fixes: fed3956429d5 ("qga: add implementation of guest-get-disks for Linux") +Reported-by: Peter Maydell +Cc: Marc-André Lureau +Cc: Tomáš Golembiovský +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Michael Roth + +(cherry-picked from commit b1b9ab1c04d560f86d8da3dfca4d8b21de75fee6) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index af80bb7ccbb..1a9c2cbc3e6 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1384,6 +1384,9 @@ GuestDiskInfoList *qmp_guest_get_disks(Error **errp) + get_disk_deps(disk_dir, disk); + ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name); + } ++ ++ closedir(dp); ++ + return ret; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch b/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch new file mode 100644 index 0000000..3ec8b80 --- /dev/null +++ b/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch @@ -0,0 +1,115 @@ +From 0829b1e0a9ca1a8270e138beb0c58b0b1ad67c9a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 8 Jan 2021 07:41:01 -0500 +Subject: [PATCH 10/10] qga: update schema for guest-get-disks 'dependents' + field +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210108074101.290008-11-marcandre.lureau@redhat.com> +Patchwork-id: 100529 +O-Subject: [RHEL-8.3.0.z qemu-kvm PATCH 10/10] qga: update schema for guest-get-disks 'dependents' field +Bugzilla: 1913818 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Marc-André Lureau + +From: Michael Roth + +The recently-added 'guest-get-disk' command returns a list of +GuestDiskInfo entries, which in turn have a 'dependents' field which +lists devices these entries are dependent upon. Thus, 'dependencies' +is a better name for this field. Address this by renaming the field +accordingly. + +Additionally, 'dependents' is specified as non-optional, even though +it's not implemented for w32. This is misleading, since it gives users +the impression that a particular disk might not have dependencies, +when in reality that information is simply not known to the guest +agent. Address this by making 'dependents' an optional field, and only +marking it as in-use when the facilities to obtain this information are +available to the guest agent. + +Cc: Eric Blake +Cc: Tomáš Golembiovský +Cc: Marc-André Lureau +Reviewed-by: Eric Blake +Reviewed-by: Marc-André Lureau +Signed-off-by: Michael Roth + +(cherry-picked from commit a8aa94b5f8427cc2924d8cdd417c8014db1c86c0) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 10 ++++++---- + qga/qapi-schema.json | 8 ++++---- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 1a9c2cbc3e6..38bc9a229db 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1226,6 +1226,7 @@ static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) + g_debug("failed to list entries in %s", deps_dir); + return; + } ++ disk->has_dependencies = true; + while ((dep = g_dir_read_name(dp_deps)) != NULL) { + g_autofree char *dep_dir = NULL; + strList *dep_item = NULL; +@@ -1238,8 +1239,8 @@ static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) + g_debug(" adding dependent device: %s", dev_name); + dep_item = g_new0(strList, 1); + dep_item->value = dev_name; +- dep_item->next = disk->dependents; +- disk->dependents = dep_item; ++ dep_item->next = disk->dependencies; ++ disk->dependencies = dep_item; + } + } + g_dir_close(dp_deps); +@@ -1292,8 +1293,9 @@ static GuestDiskInfoList *get_disk_partitions( + partition->name = dev_name; + partition->partition = true; + /* Add parent disk as dependent for easier tracking of hierarchy */ +- partition->dependents = g_new0(strList, 1); +- partition->dependents->value = g_strdup(disk_dev); ++ partition->dependencies = g_new0(strList, 1); ++ partition->dependencies->value = g_strdup(disk_dev); ++ partition->has_dependencies = true; + + item = g_new0(GuestDiskInfoList, 1); + item->value = partition; +diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json +index 22df375c92f..4222cb92d34 100644 +--- a/qga/qapi-schema.json ++++ b/qga/qapi-schema.json +@@ -857,9 +857,9 @@ + # + # @name: device node (Linux) or device UNC (Windows) + # @partition: whether this is a partition or disk +-# @dependents: list of dependent devices; e.g. for LVs of the LVM this will +-# hold the list of PVs, for LUKS encrypted volume this will +-# contain the disk where the volume is placed. (Linux) ++# @dependencies: list of device dependencies; e.g. for LVs of the LVM this will ++# hold the list of PVs, for LUKS encrypted volume this will ++# contain the disk where the volume is placed. (Linux) + # @address: disk address information (only for non-virtual devices) + # @alias: optional alias assigned to the disk, on Linux this is a name assigned + # by device mapper +@@ -867,7 +867,7 @@ + # Since 5.2 + ## + { 'struct': 'GuestDiskInfo', +- 'data': {'name': 'str', 'partition': 'bool', 'dependents': ['str'], ++ 'data': {'name': 'str', 'partition': 'bool', '*dependencies': ['str'], + '*address': 'GuestDiskAddress', '*alias': 'str'} } + + ## +-- +2.27.0 + diff --git a/SOURCES/kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch b/SOURCES/kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch new file mode 100644 index 0000000..5384b51 --- /dev/null +++ b/SOURCES/kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch @@ -0,0 +1,55 @@ +From 5d590d354e42515ea074bf2110a2ab236dbabba1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 8 Jun 2020 15:01:34 +0100 +Subject: [PATCH 06/17] raw-format: Support BDRV_REQ_ZERO_WRITE for truncate + +RH-Author: Kevin Wolf +Message-id: <20200608150140.38218-6-kwolf@redhat.com> +Patchwork-id: 97447 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 05/11] raw-format: Support BDRV_REQ_ZERO_WRITE for truncate +Bugzilla: 1780574 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +The raw format driver can simply forward the flag and let its bs->file +child take care of actually providing the zeros. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200424125448.63318-6-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1ddaabaecb7eaeb6d8948a32340af95db44c54a1) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/raw-format.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/raw-format.c b/block/raw-format.c +index c3acf9a..bdec466 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -387,7 +387,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + + s->size = offset; + offset += s->offset; +- return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); ++ return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); + } + + static void raw_eject(BlockDriverState *bs, bool eject_flag) +@@ -445,6 +445,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); ++ bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags & ++ BDRV_REQ_ZERO_WRITE; + + if (bs->probed && !bdrv_is_read_only(bs)) { + bdrv_refresh_filename(bs->file->bs); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch b/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch new file mode 100644 index 0000000..09ef4de --- /dev/null +++ b/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch @@ -0,0 +1,57 @@ +From 46887feac666d0d7633ff3f5af5721fe2a80a8ab Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:13 +0100 +Subject: [PATCH 2/6] replication: assert we own context before job_cancel_sync + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-3-kwolf@redhat.com> +Patchwork-id: 94595 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] replication: assert we own context before job_cancel_sync +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +job_cancel_sync requires the job's lock to be held, all other callers +already do this (replication_stop, drive_backup_abort, +blockdev_backup_abort, job_cancel_sync_all, cancel_common). + +In this case we're in a BlockDriver handler, so we already have a lock, +just assert that it is the same as the one used for the commit_job. + +Signed-off-by: Stefan Reiter +Message-Id: <20200407115651.69472-3-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 08558e33257ec796594bd411261028a93414a70c) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/replication.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/block/replication.c b/block/replication.c +index 99532ce..0ce27ee 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -144,12 +144,15 @@ fail: + static void replication_close(BlockDriverState *bs) + { + BDRVReplicationState *s = bs->opaque; ++ Job *commit_job; + + if (s->stage == BLOCK_REPLICATION_RUNNING) { + replication_stop(s->rs, false, NULL); + } + if (s->stage == BLOCK_REPLICATION_FAILOVER) { +- job_cancel_sync(&s->commit_job->job); ++ commit_job = &s->commit_job->job; ++ assert(commit_job->aio_context == qemu_get_current_aio_context()); ++ job_cancel_sync(commit_job); + } + + if (s->mode == REPLICATION_MODE_SECONDARY) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch b/SOURCES/kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch new file mode 100644 index 0000000..c45158a --- /dev/null +++ b/SOURCES/kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch @@ -0,0 +1,54 @@ +From 1769600e1e3bd5ca48450de8ce8a118bf0af96f3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:00 -0400 +Subject: [PATCH 18/42] s390/ipl: fix off-by-one in + update_machine_ipl_properties() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-19-thuth@redhat.com> +Patchwork-id: 97028 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 18/38] s390/ipl: fix off-by-one in update_machine_ipl_properties() +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Halil Pasic + +In update_machine_ipl_properties() the array ascii_loadparm needs to +hold the 8 char loadparm and a string terminating zero char. + +Let's increase the size of ascii_loadparm accordingly. + +Signed-off-by: Halil Pasic +Fixes: 0a01e082a428 ("s390/ipl: sync back loadparm") +Fixes: Coverity CID 1421966 +Reported-by: Peter Maydell +Message-Id: <20200320143101.41764-1-pasic@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 7722837369eb1c7e808021d79da68afa0c01c26f) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/ipl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index f25339c503..fa0409dc23 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -537,7 +537,7 @@ static void update_machine_ipl_properties(IplParameterBlock *iplb) + /* Sync loadparm */ + if (iplb->flags & DIAG308_FLAGS_LP_VALID) { + uint8_t *ebcdic_loadparm = iplb->loadparm; +- char ascii_loadparm[8]; ++ char ascii_loadparm[9]; + int i; + + for (i = 0; i < 8 && ebcdic_loadparm[i]; i++) { +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-ipl-sync-back-loadparm.patch b/SOURCES/kvm-s390-ipl-sync-back-loadparm.patch new file mode 100644 index 0000000..49f4d3f --- /dev/null +++ b/SOURCES/kvm-s390-ipl-sync-back-loadparm.patch @@ -0,0 +1,91 @@ +From 53053ea2e6c757e5d044655c8b61c485e0aad4ed Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:59 -0400 +Subject: [PATCH 17/42] s390/ipl: sync back loadparm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-18-thuth@redhat.com> +Patchwork-id: 97039 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 17/38] s390/ipl: sync back loadparm +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Halil Pasic + +We expose loadparm as a r/w machine property, but if loadparm is set by +the guest via DIAG 308, we don't update the property. Having a +disconnect between the guest view and the QEMU property is not nice in +itself, but things get even worse for SCSI, where under certain +circumstances (see 789b5a401b "s390: Ensure IPL from SCSI works as +expected" for details) we call s390_gen_initial_iplb() on resets +effectively overwriting the guest/user supplied loadparm with the stale +value. + +Signed-off-by: Halil Pasic +Fixes: 7104bae9de ("hw/s390x: provide loadparm property for the machine") +Reported-by: Marc Hartmayer +Reviewed-by: Janosch Frank +Reviewed-by: Viktor Mihajlovski +Tested-by: Marc Hartmayer +Reviewed-by: David Hildenbrand +Message-Id: <20200309133223.100491-1-pasic@linux.ibm.com> +[borntraeger@de.ibm.com: use reverse xmas tree] +Signed-off-by: Christian Borntraeger +(cherry picked from commit 0a01e082a428b921e48b5314881b1f23a7b0fe50) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/ipl.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index 0b7548a549..f25339c503 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -529,6 +529,30 @@ static bool is_virtio_scsi_device(IplParameterBlock *iplb) + return is_virtio_ccw_device_of_type(iplb, VIRTIO_ID_SCSI); + } + ++static void update_machine_ipl_properties(IplParameterBlock *iplb) ++{ ++ Object *machine = qdev_get_machine(); ++ Error *err = NULL; ++ ++ /* Sync loadparm */ ++ if (iplb->flags & DIAG308_FLAGS_LP_VALID) { ++ uint8_t *ebcdic_loadparm = iplb->loadparm; ++ char ascii_loadparm[8]; ++ int i; ++ ++ for (i = 0; i < 8 && ebcdic_loadparm[i]; i++) { ++ ascii_loadparm[i] = ebcdic2ascii[(uint8_t) ebcdic_loadparm[i]]; ++ } ++ ascii_loadparm[i] = 0; ++ object_property_set_str(machine, ascii_loadparm, "loadparm", &err); ++ } else { ++ object_property_set_str(machine, "", "loadparm", &err); ++ } ++ if (err) { ++ warn_report_err(err); ++ } ++} ++ + void s390_ipl_update_diag308(IplParameterBlock *iplb) + { + S390IPLState *ipl = get_ipl_device(); +@@ -536,6 +560,7 @@ void s390_ipl_update_diag308(IplParameterBlock *iplb) + ipl->iplb = *iplb; + ipl->iplb_valid = true; + ipl->netboot = is_virtio_net_device(iplb); ++ update_machine_ipl_properties(iplb); + } + + IplParameterBlock *s390_ipl_get_iplb(void) +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-sclp-improve-special-wait-psw-logic.patch b/SOURCES/kvm-s390-sclp-improve-special-wait-psw-logic.patch new file mode 100644 index 0000000..2040d5c --- /dev/null +++ b/SOURCES/kvm-s390-sclp-improve-special-wait-psw-logic.patch @@ -0,0 +1,52 @@ +From cd7da3cf1b19fef0a497fd556562040a85e579a7 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:57 -0400 +Subject: [PATCH 15/42] s390/sclp: improve special wait psw logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-16-thuth@redhat.com> +Patchwork-id: 97037 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 15/38] s390/sclp: improve special wait psw logic +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Christian Borntraeger + +There is a special quiesce PSW that we check for "shutdown". Otherwise disabled +wait is detected as "crashed". Architecturally we must only check PSW bits +116-127. Fix this. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Christian Borntraeger +Message-Id: <1582204582-22995-1-git-send-email-borntraeger@de.ibm.com> +Reviewed-by: David Hildenbrand +Acked-by: Janosch Frank +Signed-off-by: Cornelia Huck +(cherry picked from commit 8b51c0961cc13e55b26bb6665ec3a341abdc7658) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/helper.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/s390x/helper.c b/target/s390x/helper.c +index a3a49164e4..6808dfda01 100644 +--- a/target/s390x/helper.c ++++ b/target/s390x/helper.c +@@ -89,7 +89,7 @@ hwaddr s390_cpu_get_phys_addr_debug(CPUState *cs, vaddr vaddr) + static inline bool is_special_wait_psw(uint64_t psw_addr) + { + /* signal quiesce */ +- return psw_addr == 0xfffUL; ++ return (psw_addr & 0xfffUL) == 0xfffUL; + } + + void s390_handle_wait(S390CPU *cpu) +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch b/SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch new file mode 100644 index 0000000..1b566d7 --- /dev/null +++ b/SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch @@ -0,0 +1,150 @@ +From ebcd74c2267d69fe09ca03cb8bfed7bef5ea3a85 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:08 -0400 +Subject: [PATCH 26/42] s390x: Add SIDA memory ops + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-27-thuth@redhat.com> +Patchwork-id: 97033 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 26/38] s390x: Add SIDA memory ops +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Protected guests save the instruction control blocks in the SIDA +instead of QEMU/KVM directly accessing the guest's memory. + +Let's introduce new functions to access the SIDA. + +The memops for doing so are available with KVM_CAP_S390_PROTECTED, so +let's check for that. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-8-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 1cca8265499d394d9ed4bfb75bd6e7265b529f89) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu.h | 7 ++++++- + target/s390x/kvm.c | 26 ++++++++++++++++++++++++++ + target/s390x/kvm_s390x.h | 2 ++ + target/s390x/mmu_helper.c | 14 ++++++++++++++ + 4 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index 1ff84e6b3a..edf8391504 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -828,7 +828,12 @@ int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, + #define s390_cpu_virt_mem_check_write(cpu, laddr, ar, len) \ + s390_cpu_virt_mem_rw(cpu, laddr, ar, NULL, len, true) + void s390_cpu_virt_mem_handle_exc(S390CPU *cpu, uintptr_t ra); +- ++int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, ++ int len, bool is_write); ++#define s390_cpu_pv_mem_read(cpu, offset, dest, len) \ ++ s390_cpu_pv_mem_rw(cpu, offset, dest, len, false) ++#define s390_cpu_pv_mem_write(cpu, offset, dest, len) \ ++ s390_cpu_pv_mem_rw(cpu, offset, dest, len, true) + + /* sigp.c */ + int s390_cpu_restart(S390CPU *cpu); +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index af50b2c253..f67bb5ce2c 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -154,6 +154,7 @@ static int cap_ri; + static int cap_gs; + static int cap_hpage_1m; + static int cap_vcpu_resets; ++static int cap_protected; + + static int active_cmma; + +@@ -351,6 +352,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); ++ cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); + + if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) + || !kvm_check_extension(s, KVM_CAP_S390_COW)) { +@@ -848,6 +850,30 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + return ret; + } + ++int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf, ++ int len, bool is_write) ++{ ++ struct kvm_s390_mem_op mem_op = { ++ .sida_offset = offset, ++ .size = len, ++ .op = is_write ? KVM_S390_MEMOP_SIDA_WRITE ++ : KVM_S390_MEMOP_SIDA_READ, ++ .buf = (uint64_t)hostbuf, ++ }; ++ int ret; ++ ++ if (!cap_mem_op || !cap_protected) { ++ return -ENOSYS; ++ } ++ ++ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); ++ if (ret < 0) { ++ error_report("KVM_S390_MEM_OP failed: %s", strerror(-ret)); ++ abort(); ++ } ++ return ret; ++} ++ + /* + * Legacy layout for s390: + * Older S390 KVM requires the topmost vma of the RAM to be +diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h +index dea813f450..6ab17c81b7 100644 +--- a/target/s390x/kvm_s390x.h ++++ b/target/s390x/kvm_s390x.h +@@ -19,6 +19,8 @@ void kvm_s390_vcpu_interrupt(S390CPU *cpu, struct kvm_s390_irq *irq); + void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code); + int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + int len, bool is_write); ++int kvm_s390_mem_op_pv(S390CPU *cpu, vaddr addr, void *hostbuf, int len, ++ bool is_write); + void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code); + int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); + void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); +diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c +index c9f3f34750..ec8befbdc8 100644 +--- a/target/s390x/mmu_helper.c ++++ b/target/s390x/mmu_helper.c +@@ -474,6 +474,20 @@ static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages, + return 0; + } + ++int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, ++ int len, bool is_write) ++{ ++ int ret; ++ ++ if (kvm_enabled()) { ++ ret = kvm_s390_mem_op_pv(cpu, offset, hostbuf, len, is_write); ++ } else { ++ /* Protected Virtualization is a KVM/Hardware only feature */ ++ g_assert_not_reached(); ++ } ++ return ret; ++} ++ + /** + * s390_cpu_virt_mem_rw: + * @laddr: the logical start address +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Add-missing-vcpu-reset-functions.patch b/SOURCES/kvm-s390x-Add-missing-vcpu-reset-functions.patch new file mode 100644 index 0000000..9ce071e --- /dev/null +++ b/SOURCES/kvm-s390x-Add-missing-vcpu-reset-functions.patch @@ -0,0 +1,176 @@ +From e11643b5363262e9f809762a1f2bb5c4a8f26c2a Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:56 -0400 +Subject: [PATCH 14/42] s390x: Add missing vcpu reset functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-15-thuth@redhat.com> +Patchwork-id: 97023 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 14/38] s390x: Add missing vcpu reset functions +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Up to now we only had an ioctl to reset vcpu data QEMU couldn't reach +for the initial reset, which was also called for the clear reset. To +be architecture compliant, we also need to clear local interrupts on a +normal reset. + +Because of this and the upcoming protvirt support we need to add +ioctls for the missing clear and normal resets. + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Acked-by: David Hildenbrand +Message-Id: <20200214151636.8764-3-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit b91a03946e0f65ddd22927dd80ca1276bf89c5af) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu.c | 14 ++++++++++++-- + target/s390x/kvm-stub.c | 10 +++++++++- + target/s390x/kvm.c | 42 ++++++++++++++++++++++++++++++++-------- + target/s390x/kvm_s390x.h | 4 +++- + 4 files changed, 58 insertions(+), 12 deletions(-) + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index e538a4a3e2..c0dd502b84 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -144,8 +144,18 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + } + + /* Reset state inside the kernel that we cannot access yet from QEMU. */ +- if (kvm_enabled() && type != S390_CPU_RESET_NORMAL) { +- kvm_s390_reset_vcpu(cpu); ++ if (kvm_enabled()) { ++ switch (type) { ++ case S390_CPU_RESET_CLEAR: ++ kvm_s390_reset_vcpu_clear(cpu); ++ break; ++ case S390_CPU_RESET_INITIAL: ++ kvm_s390_reset_vcpu_initial(cpu); ++ break; ++ case S390_CPU_RESET_NORMAL: ++ kvm_s390_reset_vcpu_normal(cpu); ++ break; ++ } + } + } + +diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c +index 5152e2bdf1..c4cd497f85 100644 +--- a/target/s390x/kvm-stub.c ++++ b/target/s390x/kvm-stub.c +@@ -83,7 +83,15 @@ void kvm_s390_cmma_reset(void) + { + } + +-void kvm_s390_reset_vcpu(S390CPU *cpu) ++void kvm_s390_reset_vcpu_initial(S390CPU *cpu) ++{ ++} ++ ++void kvm_s390_reset_vcpu_clear(S390CPU *cpu) ++{ ++} ++ ++void kvm_s390_reset_vcpu_normal(S390CPU *cpu) + { + } + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 1c5bc7a2f9..75d82af6fc 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -151,6 +151,7 @@ static int cap_s390_irq; + static int cap_ri; + static int cap_gs; + static int cap_hpage_1m; ++static int cap_vcpu_resets; + + static int active_cmma; + +@@ -342,6 +343,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); + cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); ++ cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + + if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) + || !kvm_check_extension(s, KVM_CAP_S390_COW)) { +@@ -403,17 +405,41 @@ int kvm_arch_destroy_vcpu(CPUState *cs) + return 0; + } + +-void kvm_s390_reset_vcpu(S390CPU *cpu) ++static void kvm_s390_reset_vcpu(S390CPU *cpu, unsigned long type) + { + CPUState *cs = CPU(cpu); + +- /* The initial reset call is needed here to reset in-kernel +- * vcpu data that we can't access directly from QEMU +- * (i.e. with older kernels which don't support sync_regs/ONE_REG). +- * Before this ioctl cpu_synchronize_state() is called in common kvm +- * code (kvm-all) */ +- if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) { +- error_report("Initial CPU reset failed on CPU %i", cs->cpu_index); ++ /* ++ * The reset call is needed here to reset in-kernel vcpu data that ++ * we can't access directly from QEMU (i.e. with older kernels ++ * which don't support sync_regs/ONE_REG). Before this ioctl ++ * cpu_synchronize_state() is called in common kvm code ++ * (kvm-all). ++ */ ++ if (kvm_vcpu_ioctl(cs, type)) { ++ error_report("CPU reset failed on CPU %i type %lx", ++ cs->cpu_index, type); ++ } ++} ++ ++void kvm_s390_reset_vcpu_initial(S390CPU *cpu) ++{ ++ kvm_s390_reset_vcpu(cpu, KVM_S390_INITIAL_RESET); ++} ++ ++void kvm_s390_reset_vcpu_clear(S390CPU *cpu) ++{ ++ if (cap_vcpu_resets) { ++ kvm_s390_reset_vcpu(cpu, KVM_S390_CLEAR_RESET); ++ } else { ++ kvm_s390_reset_vcpu(cpu, KVM_S390_INITIAL_RESET); ++ } ++} ++ ++void kvm_s390_reset_vcpu_normal(S390CPU *cpu) ++{ ++ if (cap_vcpu_resets) { ++ kvm_s390_reset_vcpu(cpu, KVM_S390_NORMAL_RESET); + } + } + +diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h +index caf985955b..0b21789796 100644 +--- a/target/s390x/kvm_s390x.h ++++ b/target/s390x/kvm_s390x.h +@@ -34,7 +34,9 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch, + int vq, bool assign); + int kvm_s390_cmma_active(void); + void kvm_s390_cmma_reset(void); +-void kvm_s390_reset_vcpu(S390CPU *cpu); ++void kvm_s390_reset_vcpu_clear(S390CPU *cpu); ++void kvm_s390_reset_vcpu_normal(S390CPU *cpu); ++void kvm_s390_reset_vcpu_initial(S390CPU *cpu); + int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit); + void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp); + void kvm_s390_crypto_reset(void); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Add-unpack-facility-feature-to-GA1.patch b/SOURCES/kvm-s390x-Add-unpack-facility-feature-to-GA1.patch new file mode 100644 index 0000000..8ffb7b0 --- /dev/null +++ b/SOURCES/kvm-s390x-Add-unpack-facility-feature-to-GA1.patch @@ -0,0 +1,76 @@ +From ab670456375f0d9b9b2d219fd497d04ec0009e1d Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:16 -0400 +Subject: [PATCH 34/42] s390x: Add unpack facility feature to GA1 + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-35-thuth@redhat.com> +Patchwork-id: 97052 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 34/38] s390x: Add unpack facility feature to GA1 +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Christian Borntraeger + +The unpack facility is an indication that diagnose 308 subcodes 8-10 +are available to the guest. That means, that the guest can put itself +into protected mode. + +Once it is in protected mode, the hardware stops any attempt of VM +introspection by the hypervisor. + +Some features are currently not supported in protected mode: + * vfio devices + * Migration + * Huge page backings + +Signed-off-by: Christian Borntraeger +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-17-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 572c0826615737f1c095b1b6d9e381ec40f72eb5) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/gen-features.c | 1 + + target/s390x/kvm.c | 8 ++++++++ + 2 files changed, 9 insertions(+) + +diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c +index 6278845b12..8ddeebc544 100644 +--- a/target/s390x/gen-features.c ++++ b/target/s390x/gen-features.c +@@ -562,6 +562,7 @@ static uint16_t full_GEN15_GA1[] = { + S390_FEAT_GROUP_MSA_EXT_9, + S390_FEAT_GROUP_MSA_EXT_9_PCKMO, + S390_FEAT_ETOKEN, ++ S390_FEAT_UNPACK, + }; + + /* Default features (in order of release) +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 56fe60c49c..84d7cadd09 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -2407,6 +2407,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) + clear_bit(S390_FEAT_BPB, model->features); + } + ++ /* ++ * If we have support for protected virtualization, indicate ++ * the protected virtualization IPL unpack facility. ++ */ ++ if (cap_protected) { ++ set_bit(S390_FEAT_UNPACK, model->features); ++ } ++ + /* We emulate a zPCI bus and AEN, therefore we don't need HW support */ + set_bit(S390_FEAT_ZPCI, model->features); + set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Beautify-diag308-handling.patch b/SOURCES/kvm-s390x-Beautify-diag308-handling.patch new file mode 100644 index 0000000..2ffe6a3 --- /dev/null +++ b/SOURCES/kvm-s390x-Beautify-diag308-handling.patch @@ -0,0 +1,130 @@ +From da81f2b579987ea12929f0ec803716bc16a93df7 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:49 -0400 +Subject: [PATCH 07/42] s390x: Beautify diag308 handling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-8-thuth@redhat.com> +Patchwork-id: 97022 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 07/38] s390x: Beautify diag308 handling +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Let's improve readability by: +* Using constants for the subcodes +* Moving parameter checking into a function +* Removing subcode > 6 check as the default case catches that + +Signed-off-by: Janosch Frank +Reviewed-by: Cornelia Huck +Reviewed-by: Thomas Huth +Reviewed-by: David Hildenbrand +Message-Id: <20191127175046.4911-6-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 0b7fd817e0f383760e37ca9286150d5816cf0594) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/diag.c | 54 +++++++++++++++++++++++++++------------------ + 1 file changed, 32 insertions(+), 22 deletions(-) + +diff --git a/target/s390x/diag.c b/target/s390x/diag.c +index 53c2f81f2a..b5aec06d6b 100644 +--- a/target/s390x/diag.c ++++ b/target/s390x/diag.c +@@ -53,6 +53,29 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) + #define DIAG_308_RC_NO_CONF 0x0102 + #define DIAG_308_RC_INVALID 0x0402 + ++#define DIAG308_RESET_MOD_CLR 0 ++#define DIAG308_RESET_LOAD_NORM 1 ++#define DIAG308_LOAD_CLEAR 3 ++#define DIAG308_LOAD_NORMAL_DUMP 4 ++#define DIAG308_SET 5 ++#define DIAG308_STORE 6 ++ ++static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, ++ uintptr_t ra, bool write) ++{ ++ if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { ++ s390_program_interrupt(env, PGM_SPECIFICATION, ra); ++ return -1; ++ } ++ if (!address_space_access_valid(&address_space_memory, addr, ++ sizeof(IplParameterBlock), write, ++ MEMTXATTRS_UNSPECIFIED)) { ++ s390_program_interrupt(env, PGM_ADDRESSING, ra); ++ return -1; ++ } ++ return 0; ++} ++ + void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + { + CPUState *cs = env_cpu(env); +@@ -65,30 +88,24 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + return; + } + +- if ((subcode & ~0x0ffffULL) || (subcode > 6)) { ++ if (subcode & ~0x0ffffULL) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; + } + + switch (subcode) { +- case 0: ++ case DIAG308_RESET_MOD_CLR: + s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR); + break; +- case 1: ++ case DIAG308_RESET_LOAD_NORM: + s390_ipl_reset_request(cs, S390_RESET_LOAD_NORMAL); + break; +- case 3: ++ case DIAG308_LOAD_CLEAR: ++ /* Well we still lack the clearing bit... */ + s390_ipl_reset_request(cs, S390_RESET_REIPL); + break; +- case 5: +- if ((r1 & 1) || (addr & 0x0fffULL)) { +- s390_program_interrupt(env, PGM_SPECIFICATION, ra); +- return; +- } +- if (!address_space_access_valid(&address_space_memory, addr, +- sizeof(IplParameterBlock), false, +- MEMTXATTRS_UNSPECIFIED)) { +- s390_program_interrupt(env, PGM_ADDRESSING, ra); ++ case DIAG308_SET: ++ if (diag308_parm_check(env, r1, addr, ra, false)) { + return; + } + iplb = g_new0(IplParameterBlock, 1); +@@ -110,15 +127,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + out: + g_free(iplb); + return; +- case 6: +- if ((r1 & 1) || (addr & 0x0fffULL)) { +- s390_program_interrupt(env, PGM_SPECIFICATION, ra); +- return; +- } +- if (!address_space_access_valid(&address_space_memory, addr, +- sizeof(IplParameterBlock), true, +- MEMTXATTRS_UNSPECIFIED)) { +- s390_program_interrupt(env, PGM_ADDRESSING, ra); ++ case DIAG308_STORE: ++ if (diag308_parm_check(env, r1, addr, ra, true)) { + return; + } + iplb = s390_ipl_get_iplb(); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch b/SOURCES/kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch new file mode 100644 index 0000000..dab8acc --- /dev/null +++ b/SOURCES/kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch @@ -0,0 +1,52 @@ +From 511638161566d4944a572a31d787eb27bbc0bc8e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:45 -0400 +Subject: [PATCH 03/42] s390x: Don't do a normal reset on the initial cpu +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-4-thuth@redhat.com> +Patchwork-id: 97017 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 03/38] s390x: Don't do a normal reset on the initial cpu +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +The initiating cpu needs to be reset with an initial reset. While +doing a normal reset followed by a initial reset is not wrong per se, +the Ultravisor will only allow the correct reset to be performed. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Cornelia Huck +Message-Id: <20191127175046.4911-2-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit ec9227339fce99412830d44a37eb0bd2fadd5f75) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index c2c83d2fce..4ea01c53c0 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -348,6 +348,9 @@ static void s390_machine_reset(MachineState *machine) + break; + case S390_RESET_LOAD_NORMAL: + CPU_FOREACH(t) { ++ if (t == cs) { ++ continue; ++ } + run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); + } + subsystem_reset(); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch b/SOURCES/kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch new file mode 100644 index 0000000..9b81586 --- /dev/null +++ b/SOURCES/kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch @@ -0,0 +1,101 @@ +From bdad28b11e36f657cb8909e7223a7d8fc0948c2e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:51 -0400 +Subject: [PATCH 09/42] s390x: Fix cpu normal reset ri clearing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-10-thuth@redhat.com> +Patchwork-id: 97029 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 09/38] s390x: Fix cpu normal reset ri clearing +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +As it turns out we need to clear the ri controls and PSW enablement +bit to be architecture compliant. + +Signed-off-by: Janosch Frank +Reviewed-by: Christian Borntraeger +Message-Id: <20191203132813.2734-4-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit e893baee70149896d1e43e341da4d6c614037d5d) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu.c | 7 ++++++- + target/s390x/cpu.h | 7 ++++++- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index bd39cb54b7..99ea09085a 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -100,7 +100,7 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + case S390_CPU_RESET_INITIAL: + /* initial reset does not clear everything! */ + memset(&env->start_initial_reset_fields, 0, +- offsetof(CPUS390XState, end_reset_fields) - ++ offsetof(CPUS390XState, start_normal_reset_fields) - + offsetof(CPUS390XState, start_initial_reset_fields)); + + /* architectured initial value for Breaking-Event-Address register */ +@@ -123,6 +123,11 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + &env->fpu_status); + /* fall through */ + case S390_CPU_RESET_NORMAL: ++ env->psw.mask &= ~PSW_MASK_RI; ++ memset(&env->start_normal_reset_fields, 0, ++ offsetof(CPUS390XState, end_reset_fields) - ++ offsetof(CPUS390XState, start_normal_reset_fields)); ++ + env->pfault_token = -1UL; + env->bpbc = false; + break; +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index d2af13b345..7e1c18d596 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -58,7 +58,6 @@ struct CPUS390XState { + */ + uint64_t vregs[32][2] QEMU_ALIGNED(16); /* vector registers */ + uint32_t aregs[16]; /* access registers */ +- uint8_t riccb[64]; /* runtime instrumentation control */ + uint64_t gscb[4]; /* guarded storage control */ + uint64_t etoken; /* etoken */ + uint64_t etoken_extension; /* etoken extension */ +@@ -114,6 +113,10 @@ struct CPUS390XState { + uint64_t gbea; + uint64_t pp; + ++ /* Fields up to this point are not cleared by normal CPU reset */ ++ struct {} start_normal_reset_fields; ++ uint8_t riccb[64]; /* runtime instrumentation control */ ++ + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + +@@ -252,6 +255,7 @@ extern const VMStateDescription vmstate_s390_cpu; + #undef PSW_SHIFT_ASC + #undef PSW_MASK_CC + #undef PSW_MASK_PM ++#undef PSW_MASK_RI + #undef PSW_SHIFT_MASK_PM + #undef PSW_MASK_64 + #undef PSW_MASK_32 +@@ -273,6 +277,7 @@ extern const VMStateDescription vmstate_s390_cpu; + #define PSW_MASK_CC 0x0000300000000000ULL + #define PSW_MASK_PM 0x00000F0000000000ULL + #define PSW_SHIFT_MASK_PM 40 ++#define PSW_MASK_RI 0x0000008000000000ULL + #define PSW_MASK_64 0x0000000100000000ULL + #define PSW_MASK_32 0x0000000080000000ULL + #define PSW_MASK_ESA_ADDR 0x000000007fffffffULL +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Move-clear-reset.patch b/SOURCES/kvm-s390x-Move-clear-reset.patch new file mode 100644 index 0000000..7c1614c --- /dev/null +++ b/SOURCES/kvm-s390x-Move-clear-reset.patch @@ -0,0 +1,146 @@ +From f268cc7071ecb4322c03f3183acbcf90421da3c7 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:48 -0400 +Subject: [PATCH 06/42] s390x: Move clear reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-7-thuth@redhat.com> +Patchwork-id: 97019 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 06/38] s390x: Move clear reset +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Let's also move the clear reset function into the reset handler. + +Signed-off-by: Janosch Frank +Message-Id: <20191127175046.4911-5-frankja@linux.ibm.com> +Reviewed-by: David Hildenbrand +Reviewed-by: Thomas Huth +Signed-off-by: Cornelia Huck +(cherry picked from commit eb8adcc3e9e3b8405c104ede72cf9f3bb2a5e226) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu-qom.h | 1 + + target/s390x/cpu.c | 58 +++++++++++++----------------------------- + 2 files changed, 18 insertions(+), 41 deletions(-) + +diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h +index 6f0a12042e..dbe5346ec9 100644 +--- a/target/s390x/cpu-qom.h ++++ b/target/s390x/cpu-qom.h +@@ -37,6 +37,7 @@ typedef struct S390CPUDef S390CPUDef; + typedef enum cpu_reset_type { + S390_CPU_RESET_NORMAL, + S390_CPU_RESET_INITIAL, ++ S390_CPU_RESET_CLEAR, + } cpu_reset_type; + + /** +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index ca62fe7685..bd39cb54b7 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -94,6 +94,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); + + switch (type) { ++ case S390_CPU_RESET_CLEAR: ++ memset(env, 0, offsetof(CPUS390XState, start_initial_reset_fields)); ++ /* fall through */ + case S390_CPU_RESET_INITIAL: + /* initial reset does not clear everything! */ + memset(&env->start_initial_reset_fields, 0, +@@ -107,6 +110,14 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + env->cregs[0] = CR0_RESET; + env->cregs[14] = CR14_RESET; + ++#if defined(CONFIG_USER_ONLY) ++ /* user mode should always be allowed to use the full FPU */ ++ env->cregs[0] |= CR0_AFP; ++ if (s390_has_feat(S390_FEAT_VECTOR)) { ++ env->cregs[0] |= CR0_VECTOR; ++ } ++#endif ++ + /* tininess for underflow is detected before rounding */ + set_float_detect_tininess(float_tininess_before_rounding, + &env->fpu_status); +@@ -125,46 +136,6 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + } + } + +-/* CPUClass:reset() */ +-static void s390_cpu_full_reset(CPUState *s) +-{ +- S390CPU *cpu = S390_CPU(s); +- S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); +- CPUS390XState *env = &cpu->env; +- +- scc->parent_reset(s); +- cpu->env.sigp_order = 0; +- s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); +- +- memset(env, 0, offsetof(CPUS390XState, end_reset_fields)); +- +- /* architectured initial values for CR 0 and 14 */ +- env->cregs[0] = CR0_RESET; +- env->cregs[14] = CR14_RESET; +- +-#if defined(CONFIG_USER_ONLY) +- /* user mode should always be allowed to use the full FPU */ +- env->cregs[0] |= CR0_AFP; +- if (s390_has_feat(S390_FEAT_VECTOR)) { +- env->cregs[0] |= CR0_VECTOR; +- } +-#endif +- +- /* architectured initial value for Breaking-Event-Address register */ +- env->gbea = 1; +- +- env->pfault_token = -1UL; +- +- /* tininess for underflow is detected before rounding */ +- set_float_detect_tininess(float_tininess_before_rounding, +- &env->fpu_status); +- +- /* Reset state inside the kernel that we cannot access yet from QEMU. */ +- if (kvm_enabled()) { +- kvm_s390_reset_vcpu(cpu); +- } +-} +- + #if !defined(CONFIG_USER_ONLY) + static void s390_cpu_machine_reset_cb(void *opaque) + { +@@ -456,6 +427,11 @@ static Property s390x_cpu_properties[] = { + DEFINE_PROP_END_OF_LIST() + }; + ++static void s390_cpu_reset_full(CPUState *s) ++{ ++ return s390_cpu_reset(s, S390_CPU_RESET_CLEAR); ++} ++ + static void s390_cpu_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *scc = S390_CPU_CLASS(oc); +@@ -472,7 +448,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) + scc->load_normal = s390_cpu_load_normal; + #endif + scc->reset = s390_cpu_reset; +- cc->reset = s390_cpu_full_reset; ++ cc->reset = s390_cpu_reset_full; + cc->class_by_name = s390_cpu_class_by_name, + cc->has_work = s390_cpu_has_work; + #ifdef CONFIG_TCG +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch b/SOURCES/kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch new file mode 100644 index 0000000..ac183cf --- /dev/null +++ b/SOURCES/kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch @@ -0,0 +1,83 @@ +From c9eee8aeed39976293e0d857039fcf729b821e83 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:03 -0400 +Subject: [PATCH 21/42] s390x: Move diagnose 308 subcodes and rcs into ipl.h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-22-thuth@redhat.com> +Patchwork-id: 97032 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 21/38] s390x: Move diagnose 308 subcodes and rcs into ipl.h +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +They are part of the IPL process, so let's put them into the ipl +header. + +Signed-off-by: Janosch Frank +Reviewed-by: Cornelia Huck +Reviewed-by: Christian Borntraeger +Reviewed-by: David Hildenbrand +Message-Id: <20200319131921.2367-2-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 9b39d29470e9dbef24ee842a44ea56bd92b855ea) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/ipl.h | 11 +++++++++++ + target/s390x/diag.c | 11 ----------- + 2 files changed, 11 insertions(+), 11 deletions(-) + +diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h +index 3e44abe1c6..a5665e6bfd 100644 +--- a/hw/s390x/ipl.h ++++ b/hw/s390x/ipl.h +@@ -159,6 +159,17 @@ struct S390IPLState { + typedef struct S390IPLState S390IPLState; + QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); + ++#define DIAG_308_RC_OK 0x0001 ++#define DIAG_308_RC_NO_CONF 0x0102 ++#define DIAG_308_RC_INVALID 0x0402 ++ ++#define DIAG308_RESET_MOD_CLR 0 ++#define DIAG308_RESET_LOAD_NORM 1 ++#define DIAG308_LOAD_CLEAR 3 ++#define DIAG308_LOAD_NORMAL_DUMP 4 ++#define DIAG308_SET 5 ++#define DIAG308_STORE 6 ++ + #define S390_IPL_TYPE_FCP 0x00 + #define S390_IPL_TYPE_CCW 0x02 + #define S390_IPL_TYPE_QEMU_SCSI 0xff +diff --git a/target/s390x/diag.c b/target/s390x/diag.c +index 54e5670b3f..8aba6341f9 100644 +--- a/target/s390x/diag.c ++++ b/target/s390x/diag.c +@@ -49,17 +49,6 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) + return diag288_class->handle_timer(diag288, func, timeout); + } + +-#define DIAG_308_RC_OK 0x0001 +-#define DIAG_308_RC_NO_CONF 0x0102 +-#define DIAG_308_RC_INVALID 0x0402 +- +-#define DIAG308_RESET_MOD_CLR 0 +-#define DIAG308_RESET_LOAD_NORM 1 +-#define DIAG308_LOAD_CLEAR 3 +-#define DIAG308_LOAD_NORMAL_DUMP 4 +-#define DIAG308_SET 5 +-#define DIAG308_STORE 6 +- + static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, + uintptr_t ra, bool write) + { +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Move-initial-reset.patch b/SOURCES/kvm-s390x-Move-initial-reset.patch new file mode 100644 index 0000000..0f2e9ab --- /dev/null +++ b/SOURCES/kvm-s390x-Move-initial-reset.patch @@ -0,0 +1,159 @@ +From 0d1c0adf25a323be0663863ebe44a6aefb5f7baf Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:47 -0400 +Subject: [PATCH 05/42] s390x: Move initial reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-6-thuth@redhat.com> +Patchwork-id: 97024 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 05/38] s390x: Move initial reset +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Let's move the intial reset into the reset handler and cleanup +afterwards. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Message-Id: <20191128083723.11937-1-frankja@linux.ibm.com> +Reviewed-by: Thomas Huth +Signed-off-by: Cornelia Huck +(cherry picked from commit 81b9222358e5c8f666f0d86057c75e40531d804c) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu-qom.h | 2 +- + target/s390x/cpu.c | 46 +++++++++++++++++------------------------- + target/s390x/cpu.h | 2 +- + target/s390x/sigp.c | 2 +- + 4 files changed, 21 insertions(+), 31 deletions(-) + +diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h +index f3b71bac67..6f0a12042e 100644 +--- a/target/s390x/cpu-qom.h ++++ b/target/s390x/cpu-qom.h +@@ -36,6 +36,7 @@ typedef struct S390CPUDef S390CPUDef; + + typedef enum cpu_reset_type { + S390_CPU_RESET_NORMAL, ++ S390_CPU_RESET_INITIAL, + } cpu_reset_type; + + /** +@@ -62,7 +63,6 @@ typedef struct S390CPUClass { + void (*parent_reset)(CPUState *cpu); + void (*load_normal)(CPUState *cpu); + void (*reset)(CPUState *cpu, cpu_reset_type type); +- void (*initial_cpu_reset)(CPUState *cpu); + } S390CPUClass; + + typedef struct S390CPU S390CPU; +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index 67d6fbfa44..ca62fe7685 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -94,6 +94,23 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); + + switch (type) { ++ case S390_CPU_RESET_INITIAL: ++ /* initial reset does not clear everything! */ ++ memset(&env->start_initial_reset_fields, 0, ++ offsetof(CPUS390XState, end_reset_fields) - ++ offsetof(CPUS390XState, start_initial_reset_fields)); ++ ++ /* architectured initial value for Breaking-Event-Address register */ ++ env->gbea = 1; ++ ++ /* architectured initial values for CR 0 and 14 */ ++ env->cregs[0] = CR0_RESET; ++ env->cregs[14] = CR14_RESET; ++ ++ /* tininess for underflow is detected before rounding */ ++ set_float_detect_tininess(float_tininess_before_rounding, ++ &env->fpu_status); ++ /* fall through */ + case S390_CPU_RESET_NORMAL: + env->pfault_token = -1UL; + env->bpbc = false; +@@ -101,35 +118,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + default: + g_assert_not_reached(); + } +-} +- +-/* S390CPUClass::initial_reset() */ +-static void s390_cpu_initial_reset(CPUState *s) +-{ +- S390CPU *cpu = S390_CPU(s); +- CPUS390XState *env = &cpu->env; +- +- s390_cpu_reset(s, S390_CPU_RESET_NORMAL); +- /* initial reset does not clear everything! */ +- memset(&env->start_initial_reset_fields, 0, +- offsetof(CPUS390XState, end_reset_fields) - +- offsetof(CPUS390XState, start_initial_reset_fields)); +- +- /* architectured initial values for CR 0 and 14 */ +- env->cregs[0] = CR0_RESET; +- env->cregs[14] = CR14_RESET; +- +- /* architectured initial value for Breaking-Event-Address register */ +- env->gbea = 1; +- +- env->pfault_token = -1UL; +- +- /* tininess for underflow is detected before rounding */ +- set_float_detect_tininess(float_tininess_before_rounding, +- &env->fpu_status); + + /* Reset state inside the kernel that we cannot access yet from QEMU. */ +- if (kvm_enabled()) { ++ if (kvm_enabled() && type != S390_CPU_RESET_NORMAL) { + kvm_s390_reset_vcpu(cpu); + } + } +@@ -481,7 +472,6 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) + scc->load_normal = s390_cpu_load_normal; + #endif + scc->reset = s390_cpu_reset; +- scc->initial_cpu_reset = s390_cpu_initial_reset; + cc->reset = s390_cpu_full_reset; + cc->class_by_name = s390_cpu_class_by_name, + cc->has_work = s390_cpu_has_work; +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index 18123dfd5b..d2af13b345 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -748,7 +748,7 @@ static inline void s390_do_cpu_initial_reset(CPUState *cs, run_on_cpu_data arg) + { + S390CPUClass *scc = S390_CPU_GET_CLASS(cs); + +- scc->initial_cpu_reset(cs); ++ scc->reset(cs, S390_CPU_RESET_INITIAL); + } + + static inline void s390_do_cpu_load_normal(CPUState *cs, run_on_cpu_data arg) +diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c +index 850139b9cd..727875bb4a 100644 +--- a/target/s390x/sigp.c ++++ b/target/s390x/sigp.c +@@ -254,7 +254,7 @@ static void sigp_initial_cpu_reset(CPUState *cs, run_on_cpu_data arg) + SigpInfo *si = arg.host_ptr; + + cpu_synchronize_state(cs); +- scc->initial_cpu_reset(cs); ++ scc->reset(cs, S390_CPU_RESET_INITIAL); + cpu_synchronize_post_reset(cs); + si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; + } +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch b/SOURCES/kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch new file mode 100644 index 0000000..81a4368 --- /dev/null +++ b/SOURCES/kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch @@ -0,0 +1,145 @@ +From 53b5a7f83f3e6b94c66cbbb97ea42bbf02cb96b4 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:46 -0400 +Subject: [PATCH 04/42] s390x: Move reset normal to shared reset handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-5-thuth@redhat.com> +Patchwork-id: 97018 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 04/38] s390x: Move reset normal to shared reset handler +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Let's start moving the cpu reset functions into a single function with +a switch/case, so we can later use fallthroughs and share more code +between resets. + +This patch introduces the reset function by renaming cpu_reset(). + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Message-Id: <20191127175046.4911-3-frankja@linux.ibm.com> +Reviewed-by: Thomas Huth +Signed-off-by: Cornelia Huck +(cherry picked from commit eac4f82791f1807c423e85670837db103b9d59b3) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu-qom.h | 6 +++++- + target/s390x/cpu.c | 19 +++++++++++++------ + target/s390x/cpu.h | 2 +- + target/s390x/sigp.c | 2 +- + 4 files changed, 20 insertions(+), 9 deletions(-) + +diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h +index b809ec8418..f3b71bac67 100644 +--- a/target/s390x/cpu-qom.h ++++ b/target/s390x/cpu-qom.h +@@ -34,6 +34,10 @@ + typedef struct S390CPUModel S390CPUModel; + typedef struct S390CPUDef S390CPUDef; + ++typedef enum cpu_reset_type { ++ S390_CPU_RESET_NORMAL, ++} cpu_reset_type; ++ + /** + * S390CPUClass: + * @parent_realize: The parent class' realize handler. +@@ -57,7 +61,7 @@ typedef struct S390CPUClass { + DeviceRealize parent_realize; + void (*parent_reset)(CPUState *cpu); + void (*load_normal)(CPUState *cpu); +- void (*cpu_reset)(CPUState *cpu); ++ void (*reset)(CPUState *cpu, cpu_reset_type type); + void (*initial_cpu_reset)(CPUState *cpu); + } S390CPUClass; + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index 3abe7e80fd..67d6fbfa44 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -82,18 +82,25 @@ static void s390_cpu_load_normal(CPUState *s) + } + #endif + +-/* S390CPUClass::cpu_reset() */ +-static void s390_cpu_reset(CPUState *s) ++/* S390CPUClass::reset() */ ++static void s390_cpu_reset(CPUState *s, cpu_reset_type type) + { + S390CPU *cpu = S390_CPU(s); + S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); + CPUS390XState *env = &cpu->env; + +- env->pfault_token = -1UL; +- env->bpbc = false; + scc->parent_reset(s); + cpu->env.sigp_order = 0; + s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); ++ ++ switch (type) { ++ case S390_CPU_RESET_NORMAL: ++ env->pfault_token = -1UL; ++ env->bpbc = false; ++ break; ++ default: ++ g_assert_not_reached(); ++ } + } + + /* S390CPUClass::initial_reset() */ +@@ -102,7 +109,7 @@ static void s390_cpu_initial_reset(CPUState *s) + S390CPU *cpu = S390_CPU(s); + CPUS390XState *env = &cpu->env; + +- s390_cpu_reset(s); ++ s390_cpu_reset(s, S390_CPU_RESET_NORMAL); + /* initial reset does not clear everything! */ + memset(&env->start_initial_reset_fields, 0, + offsetof(CPUS390XState, end_reset_fields) - +@@ -473,7 +480,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) + #if !defined(CONFIG_USER_ONLY) + scc->load_normal = s390_cpu_load_normal; + #endif +- scc->cpu_reset = s390_cpu_reset; ++ scc->reset = s390_cpu_reset; + scc->initial_cpu_reset = s390_cpu_initial_reset; + cc->reset = s390_cpu_full_reset; + cc->class_by_name = s390_cpu_class_by_name, +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index 17460ed7b3..18123dfd5b 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -741,7 +741,7 @@ static inline void s390_do_cpu_reset(CPUState *cs, run_on_cpu_data arg) + { + S390CPUClass *scc = S390_CPU_GET_CLASS(cs); + +- scc->cpu_reset(cs); ++ scc->reset(cs, S390_CPU_RESET_NORMAL); + } + + static inline void s390_do_cpu_initial_reset(CPUState *cs, run_on_cpu_data arg) +diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c +index 2ce22d4dc1..850139b9cd 100644 +--- a/target/s390x/sigp.c ++++ b/target/s390x/sigp.c +@@ -266,7 +266,7 @@ static void sigp_cpu_reset(CPUState *cs, run_on_cpu_data arg) + SigpInfo *si = arg.host_ptr; + + cpu_synchronize_state(cs); +- scc->cpu_reset(cs); ++ scc->reset(cs, S390_CPU_RESET_NORMAL); + cpu_synchronize_post_reset(cs); + si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; + } +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch b/SOURCES/kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch new file mode 100644 index 0000000..9447240 --- /dev/null +++ b/SOURCES/kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch @@ -0,0 +1,70 @@ +From 7171a794e8a7d91805516174187addc3b8e6b423 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:54 -0400 +Subject: [PATCH 12/42] s390x: Properly fetch and test the short psw on diag308 + subc 0/1 + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-13-thuth@redhat.com> +Patchwork-id: 97025 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 12/38] s390x: Properly fetch and test the short psw on diag308 subc 0/1 +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +We need to actually fetch the cpu mask and set it. As we invert the +short psw indication in the mask, SIE will report a specification +exception, if it wasn't present in the reset psw. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Message-Id: <20191129142025.21453-2-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 104130cb7c106378dab944397c6a455c4a6d552f) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu.c | 12 ++++++++++-- + target/s390x/cpu.h | 1 + + 2 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index 99ea09085a..625daeedd1 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -76,8 +76,16 @@ static bool s390_cpu_has_work(CPUState *cs) + static void s390_cpu_load_normal(CPUState *s) + { + S390CPU *cpu = S390_CPU(s); +- cpu->env.psw.addr = ldl_phys(s->as, 4) & PSW_MASK_ESA_ADDR; +- cpu->env.psw.mask = PSW_MASK_32 | PSW_MASK_64; ++ uint64_t spsw = ldq_phys(s->as, 0); ++ ++ cpu->env.psw.mask = spsw & 0xffffffff80000000ULL; ++ /* ++ * Invert short psw indication, so SIE will report a specification ++ * exception if it was not set. ++ */ ++ cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; ++ cpu->env.psw.addr = spsw & 0x7fffffffULL; ++ + s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); + } + #endif +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index 7e1c18d596..7f5fa1d35b 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -269,6 +269,7 @@ extern const VMStateDescription vmstate_s390_cpu; + #define PSW_MASK_EXT 0x0100000000000000ULL + #define PSW_MASK_KEY 0x00F0000000000000ULL + #define PSW_SHIFT_KEY 52 ++#define PSW_MASK_SHORTPSW 0x0008000000000000ULL + #define PSW_MASK_MCHECK 0x0004000000000000ULL + #define PSW_MASK_WAIT 0x0002000000000000ULL + #define PSW_MASK_PSTATE 0x0001000000000000ULL +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch b/SOURCES/kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch new file mode 100644 index 0000000..b1c7e01 --- /dev/null +++ b/SOURCES/kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch @@ -0,0 +1,87 @@ +From 4bd5ae889376816238ecad1bce054b0e198cde2b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:55 -0400 +Subject: [PATCH 13/42] s390x: Rename and use constants for short PSW address + and mask + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-14-thuth@redhat.com> +Patchwork-id: 97050 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 13/38] s390x: Rename and use constants for short PSW address and mask +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Let's rename PSW_MASK_ESA_ADDR to PSW_MASK_SHORT_ADDR because we're +not working with a ESA PSW which would not support the extended +addressing bit. Also let's actually use it. + +Additionally we introduce PSW_MASK_SHORT_CTRL and use it throughout +the codebase. + +Signed-off-by: Janosch Frank +Reviewed-by: Christian Borntraeger +Reviewed-by: David Hildenbrand +Message-Id: <20200227092341.38558-1-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit b6c2dbd7214b0b2396e1dcf9668c8b48ab571115) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/ipl.c | 2 +- + target/s390x/cpu.c | 4 ++-- + target/s390x/cpu.h | 3 ++- + 3 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index ca544d64c5..0b7548a549 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -179,7 +179,7 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp) + /* if not Linux load the address of the (short) IPL PSW */ + ipl_psw = rom_ptr(4, 4); + if (ipl_psw) { +- pentry = be32_to_cpu(*ipl_psw) & 0x7fffffffUL; ++ pentry = be32_to_cpu(*ipl_psw) & PSW_MASK_SHORT_ADDR; + } else { + error_setg(&err, "Could not get IPL PSW"); + goto error; +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index 625daeedd1..e538a4a3e2 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -78,13 +78,13 @@ static void s390_cpu_load_normal(CPUState *s) + S390CPU *cpu = S390_CPU(s); + uint64_t spsw = ldq_phys(s->as, 0); + +- cpu->env.psw.mask = spsw & 0xffffffff80000000ULL; ++ cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; + /* + * Invert short psw indication, so SIE will report a specification + * exception if it was not set. + */ + cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; +- cpu->env.psw.addr = spsw & 0x7fffffffULL; ++ cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; + + s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); + } +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index 7f5fa1d35b..1ff84e6b3a 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -281,7 +281,8 @@ extern const VMStateDescription vmstate_s390_cpu; + #define PSW_MASK_RI 0x0000008000000000ULL + #define PSW_MASK_64 0x0000000100000000ULL + #define PSW_MASK_32 0x0000000080000000ULL +-#define PSW_MASK_ESA_ADDR 0x000000007fffffffULL ++#define PSW_MASK_SHORT_ADDR 0x000000007fffffffULL ++#define PSW_MASK_SHORT_CTRL 0xffffffff80000000ULL + + #undef PSW_ASC_PRIMARY + #undef PSW_ASC_ACCREG +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch b/SOURCES/kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch new file mode 100644 index 0000000..8ce7625 --- /dev/null +++ b/SOURCES/kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch @@ -0,0 +1,119 @@ +From 04d4e7eda95316b64ea9dc0f4ca8801d531652e7 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:41 -0400 +Subject: [PATCH 07/12] s390x/css: Refactor the css_queue_crw() routine + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-8-cohuck@redhat.com> +Patchwork-id: 97700 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 7/9] s390x/css: Refactor the css_queue_crw() routine +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +From: Eric Farman + +We have a use case (vfio-ccw) where a CRW is already built and +ready to use. Rather than teasing out the components just to +reassemble it later, let's rework this code so we can queue a +fully-qualified CRW directly. + +Signed-off-by: Eric Farman +Reviewed-by: Cornelia Huck +Message-Id: <20200505125757.98209-6-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit f6dde1b012e678aa64339520ef7519ec04026cf1) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/css.c | 44 ++++++++++++++++++++++++++++-------------- + include/hw/s390x/css.h | 1 + + 2 files changed, 30 insertions(+), 15 deletions(-) + +diff --git a/hw/s390x/css.c b/hw/s390x/css.c +index 71fd3f9a00..a8de8a0c84 100644 +--- a/hw/s390x/css.c ++++ b/hw/s390x/css.c +@@ -2170,30 +2170,23 @@ void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, + } + } + +-void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, +- int chain, uint16_t rsid) ++void css_crw_add_to_queue(CRW crw) + { + CrwContainer *crw_cont; + +- trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : ""); ++ trace_css_crw((crw.flags & CRW_FLAGS_MASK_RSC) >> 8, ++ crw.flags & CRW_FLAGS_MASK_ERC, ++ crw.rsid, ++ (crw.flags & CRW_FLAGS_MASK_C) ? "(chained)" : ""); ++ + /* TODO: Maybe use a static crw pool? */ + crw_cont = g_try_new0(CrwContainer, 1); + if (!crw_cont) { + channel_subsys.crws_lost = true; + return; + } +- crw_cont->crw.flags = (rsc << 8) | erc; +- if (solicited) { +- crw_cont->crw.flags |= CRW_FLAGS_MASK_S; +- } +- if (chain) { +- crw_cont->crw.flags |= CRW_FLAGS_MASK_C; +- } +- crw_cont->crw.rsid = rsid; +- if (channel_subsys.crws_lost) { +- crw_cont->crw.flags |= CRW_FLAGS_MASK_R; +- channel_subsys.crws_lost = false; +- } ++ ++ crw_cont->crw = crw; + + QTAILQ_INSERT_TAIL(&channel_subsys.pending_crws, crw_cont, sibling); + +@@ -2204,6 +2197,27 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, + } + } + ++void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, ++ int chain, uint16_t rsid) ++{ ++ CRW crw; ++ ++ crw.flags = (rsc << 8) | erc; ++ if (solicited) { ++ crw.flags |= CRW_FLAGS_MASK_S; ++ } ++ if (chain) { ++ crw.flags |= CRW_FLAGS_MASK_C; ++ } ++ crw.rsid = rsid; ++ if (channel_subsys.crws_lost) { ++ crw.flags |= CRW_FLAGS_MASK_R; ++ channel_subsys.crws_lost = false; ++ } ++ ++ css_crw_add_to_queue(crw); ++} ++ + void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, + int hotplugged, int add) + { +diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h +index 7e3a5e7433..08c869ab0a 100644 +--- a/include/hw/s390x/css.h ++++ b/include/hw/s390x/css.h +@@ -205,6 +205,7 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src); + void css_inject_io_interrupt(SubchDev *sch); + void css_reset(void); + void css_reset_sch(SubchDev *sch); ++void css_crw_add_to_queue(CRW crw); + void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, + int chain, uint16_t rsid); + void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch b/SOURCES/kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch new file mode 100644 index 0000000..8b9294e --- /dev/null +++ b/SOURCES/kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch @@ -0,0 +1,82 @@ +From 536b6081c0739bebbb33583370f62116d0cb42da Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:01 -0400 +Subject: [PATCH 19/42] s390x: ipl: Consolidate iplb validity check into one + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-20-thuth@redhat.com> +Patchwork-id: 97038 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 19/38] s390x: ipl: Consolidate iplb validity check into one function +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +It's nicer to just call one function than calling a function for each +possible iplb type. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Message-Id: <20200310090950.61172-1-frankja@linux.ibm.com> +Reviewed-by: Christian Borntraeger +Signed-off-by: Christian Borntraeger +(cherry picked from commit 94c21436e5a89143f8b9cb4d089d1a2f3f4fd377) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/ipl.h | 18 +++++++++--------- + target/s390x/diag.c | 2 +- + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h +index d4813105db..3e44abe1c6 100644 +--- a/hw/s390x/ipl.h ++++ b/hw/s390x/ipl.h +@@ -173,16 +173,16 @@ static inline bool iplb_valid_len(IplParameterBlock *iplb) + return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock); + } + +-static inline bool iplb_valid_ccw(IplParameterBlock *iplb) ++static inline bool iplb_valid(IplParameterBlock *iplb) + { +- return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN && +- iplb->pbt == S390_IPL_TYPE_CCW; +-} +- +-static inline bool iplb_valid_fcp(IplParameterBlock *iplb) +-{ +- return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN && +- iplb->pbt == S390_IPL_TYPE_FCP; ++ switch (iplb->pbt) { ++ case S390_IPL_TYPE_FCP: ++ return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN; ++ case S390_IPL_TYPE_CCW: ++ return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN; ++ default: ++ return false; ++ } + } + + #endif +diff --git a/target/s390x/diag.c b/target/s390x/diag.c +index b5aec06d6b..54e5670b3f 100644 +--- a/target/s390x/diag.c ++++ b/target/s390x/diag.c +@@ -117,7 +117,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + + cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); + +- if (!iplb_valid_ccw(iplb) && !iplb_valid_fcp(iplb)) { ++ if (!iplb_valid(iplb)) { + env->regs[r1 + 1] = DIAG_308_RC_INVALID; + goto out; + } +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch b/SOURCES/kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch new file mode 100644 index 0000000..9882324 --- /dev/null +++ b/SOURCES/kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch @@ -0,0 +1,83 @@ +From 999cf62d870ff9aa8e9609fcbbcefef9ae1aceb6 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:50 -0400 +Subject: [PATCH 08/42] s390x: kvm: Make kvm_sclp_service_call void +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-9-thuth@redhat.com> +Patchwork-id: 97030 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 08/38] s390x: kvm: Make kvm_sclp_service_call void +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +It defaults to returning 0 anyway and that return value is not +necessary, as 0 is also the default rc that the caller would return. + +While doing that we can simplify the logic a bit and return early if +we inject a PGM exception. + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Message-Id: <20191129091713.4582-1-frankja@linux.ibm.com> +Reviewed-by: David Hildenbrand +Signed-off-by: Cornelia Huck +(cherry picked from commit 15b6c0370c3e2774fd9ffda5c10c6e36952e8eb6) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/kvm.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index a02d569537..1c5bc7a2f9 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -1159,13 +1159,13 @@ void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code) + kvm_s390_vcpu_interrupt(cpu, &irq); + } + +-static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, ++static void kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, + uint16_t ipbh0) + { + CPUS390XState *env = &cpu->env; + uint64_t sccb; + uint32_t code; +- int r = 0; ++ int r; + + sccb = env->regs[ipbh0 & 0xf]; + code = env->regs[(ipbh0 & 0xf0) >> 4]; +@@ -1173,11 +1173,9 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, + r = sclp_service_call(env, sccb, code); + if (r < 0) { + kvm_s390_program_interrupt(cpu, -r); +- } else { +- setcc(cpu, r); ++ return; + } +- +- return 0; ++ setcc(cpu, r); + } + + static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) +@@ -1240,7 +1238,7 @@ static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) + setcc(cpu, 3); + break; + case PRIV_B2_SCLP_CALL: +- rc = kvm_sclp_service_call(cpu, run, ipbh0); ++ kvm_sclp_service_call(cpu, run, ipbh0); + break; + default: + rc = -1; +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Add-migration-blocker.patch b/SOURCES/kvm-s390x-protvirt-Add-migration-blocker.patch new file mode 100644 index 0000000..056f8d5 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Add-migration-blocker.patch @@ -0,0 +1,79 @@ +From 0ba8d4ea1cc34230356cc446dfa8d1cb52cbd2f3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:05 -0400 +Subject: [PATCH 23/42] s390x: protvirt: Add migration blocker + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-24-thuth@redhat.com> +Patchwork-id: 97043 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 23/38] s390x: protvirt: Add migration blocker +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Migration is not yet supported. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-5-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 0141e1b47707d90f5bd9d252da064ebdaca698a6) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 82da1d9ab5..dbd5125232 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -44,6 +44,9 @@ + #include "sysemu/sysemu.h" + #include "hw/s390x/pv.h" + #include ++#include "migration/blocker.h" ++ ++static Error *pv_mig_blocker; + + S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) + { +@@ -325,15 +328,30 @@ static void s390_machine_unprotect(S390CcwMachineState *ms) + { + s390_pv_vm_disable(); + ms->pv = false; ++ migrate_del_blocker(pv_mig_blocker); ++ error_free_or_abort(&pv_mig_blocker); + } + + static int s390_machine_protect(S390CcwMachineState *ms) + { ++ Error *local_err = NULL; + int rc; + ++ error_setg(&pv_mig_blocker, ++ "protected VMs are currently not migrateable."); ++ rc = migrate_add_blocker(pv_mig_blocker, &local_err); ++ if (rc) { ++ error_report_err(local_err); ++ error_free_or_abort(&pv_mig_blocker); ++ return rc; ++ } ++ + /* Create SE VM */ + rc = s390_pv_vm_enable(); + if (rc) { ++ error_report_err(local_err); ++ migrate_del_blocker(pv_mig_blocker); ++ error_free_or_abort(&pv_mig_blocker); + return rc; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch b/SOURCES/kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch new file mode 100644 index 0000000..0cf75b0 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch @@ -0,0 +1,135 @@ +From 1cfcff169f392179258e4535e60d4ef9cabae3c6 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:13 -0400 +Subject: [PATCH 31/42] s390x: protvirt: Disable address checks for PV guest IO + emulation + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-32-thuth@redhat.com> +Patchwork-id: 97044 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 31/38] s390x: protvirt: Disable address checks for PV guest IO emulation +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +IO instruction data is routed through SIDAD for protected guests, so +adresses do not need to be checked, as this is kernel memory which is +always available. + +Also the instruction data always starts at offset 0 of the SIDAD. + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-13-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit c10b708752e5264a85b5c3afa0a0ccfcf6503ddf) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/ioinst.c | 35 ++++++++++++++++++++++++++++------- + 1 file changed, 28 insertions(+), 7 deletions(-) + +diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c +index c437a1d8c6..bbcccf6be2 100644 +--- a/target/s390x/ioinst.c ++++ b/target/s390x/ioinst.c +@@ -16,6 +16,25 @@ + #include "hw/s390x/ioinst.h" + #include "trace.h" + #include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/pv.h" ++ ++/* All I/O instructions but chsc use the s format */ ++static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb, ++ uint8_t *ar) ++{ ++ /* ++ * Addresses for protected guests are all offsets into the ++ * satellite block which holds the IO control structures. Those ++ * control structures are always starting at offset 0 and are ++ * always aligned and accessible. So we can return 0 here which ++ * will pass the following address checks. ++ */ ++ if (s390_is_pv()) { ++ *ar = 0; ++ return 0; ++ } ++ return decode_basedisp_s(env, ipb, ar); ++} + + int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid, + int *schid) +@@ -114,7 +133,7 @@ void ioinst_handle_msch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) + CPUS390XState *env = &cpu->env; + uint8_t ar; + +- addr = decode_basedisp_s(env, ipb, &ar); ++ addr = get_address_from_regs(env, ipb, &ar); + if (addr & 3) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; +@@ -171,7 +190,7 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) + CPUS390XState *env = &cpu->env; + uint8_t ar; + +- addr = decode_basedisp_s(env, ipb, &ar); ++ addr = get_address_from_regs(env, ipb, &ar); + if (addr & 3) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; +@@ -203,7 +222,7 @@ void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra) + CPUS390XState *env = &cpu->env; + uint8_t ar; + +- addr = decode_basedisp_s(env, ipb, &ar); ++ addr = get_address_from_regs(env, ipb, &ar); + if (addr & 3) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; +@@ -234,7 +253,7 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, + CPUS390XState *env = &cpu->env; + uint8_t ar; + +- addr = decode_basedisp_s(env, ipb, &ar); ++ addr = get_address_from_regs(env, ipb, &ar); + if (addr & 3) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; +@@ -303,7 +322,7 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) + return -EIO; + } + trace_ioinst_sch_id("tsch", cssid, ssid, schid); +- addr = decode_basedisp_s(env, ipb, &ar); ++ addr = get_address_from_regs(env, ipb, &ar); + if (addr & 3) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return -EIO; +@@ -601,7 +620,7 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) + { + ChscReq *req; + ChscResp *res; +- uint64_t addr; ++ uint64_t addr = 0; + int reg; + uint16_t len; + uint16_t command; +@@ -610,7 +629,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) + + trace_ioinst("chsc"); + reg = (ipb >> 20) & 0x00f; +- addr = env->regs[reg]; ++ if (!s390_is_pv()) { ++ addr = env->regs[reg]; ++ } + /* Page boundary? */ + if (addr & 0xfff) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch b/SOURCES/kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch new file mode 100644 index 0000000..9857f28 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch @@ -0,0 +1,55 @@ +From b54e5e6df5d5bbe4dc0a206be9f6b6d971ce6f43 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:17 -0400 +Subject: [PATCH 35/42] s390x: protvirt: Fix stray error_report_err in + s390_machine_protect +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-36-thuth@redhat.com> +Patchwork-id: 97042 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 35/38] s390x: protvirt: Fix stray error_report_err in s390_machine_protect +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +In case the protection of the machine fails at s390_pv_vm_enable(), +we'll currently report the local_error variable. Problem is that +there's no migration blocker error that we can report at this point so +the pointer is always NULL which leads to a SEGFAULT. + +Let's remove the error report. + +Signed-off-by: Janosch Frank +Reported-by: Marc Hartmayer +Fixes: 0141e1b47707 ("s390x: protvirt: Add migration blocker") +Message-Id: <20200326140505.2432-1-frankja@linux.ibm.com> +Reviewed-by: David Hildenbrand +Signed-off-by: Cornelia Huck +(cherry picked from commit 7152c9ecc6530ea145c122b0a58cc28802f630c6) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index b4ebe83766..c08e42bda1 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -360,7 +360,6 @@ static int s390_machine_protect(S390CcwMachineState *ms) + rc = s390_pv_vm_enable(); + if (rc) { + qemu_balloon_inhibit(false); +- error_report_err(local_err); + migrate_del_blocker(pv_mig_blocker); + error_free_or_abort(&pv_mig_blocker); + return rc; +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch b/SOURCES/kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch new file mode 100644 index 0000000..4d6a44b --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch @@ -0,0 +1,61 @@ +From 680154545d1f9d75fb33615b1900661e7d09be4e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:15 -0400 +Subject: [PATCH 33/42] s390x: protvirt: Handle SIGP store status correctly + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-34-thuth@redhat.com> +Patchwork-id: 97054 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 33/38] s390x: protvirt: Handle SIGP store status correctly +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +For protected VMs status storing is not done by QEMU anymore. + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-15-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit f2a2d9a2bae8f6fdc5e9a40c1241e9428f15b4df) +[thuth: fixed contextual conflict due to missing commit 44eaccd091a7365fd37) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/helper.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/target/s390x/helper.c b/target/s390x/helper.c +index 6808dfda01..36b6d3d9d1 100644 +--- a/target/s390x/helper.c ++++ b/target/s390x/helper.c +@@ -25,6 +25,7 @@ + #include "qemu/timer.h" + #include "qemu/qemu-print.h" + #include "hw/s390x/ioinst.h" ++#include "hw/s390x/pv.h" + #include "sysemu/hw_accel.h" + #include "sysemu/runstate.h" + #ifndef CONFIG_USER_ONLY +@@ -246,6 +247,11 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) + hwaddr len = sizeof(*sa); + int i; + ++ /* For PVMs storing will occur when this cpu enters SIE again */ ++ if (s390_is_pv()) { ++ return 0; ++ } ++ + sa = cpu_physical_memory_map(addr, &len, 1); + if (!sa) { + return -EFAULT; +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch b/SOURCES/kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch new file mode 100644 index 0000000..a843d03 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch @@ -0,0 +1,104 @@ +From 095553f9dd1fec02869bf974e8cc07614d6587e5 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:06 -0400 +Subject: [PATCH 24/42] s390x: protvirt: Inhibit balloon when switching to + protected mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-25-thuth@redhat.com> +Patchwork-id: 97036 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 24/38] s390x: protvirt: Inhibit balloon when switching to protected mode +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Ballooning in protected VMs can only be done when the guest shares the +pages it gives to the host. If pages are not shared, the integrity +checks will fail once those pages have been altered and are given back +to the guest. + +As we currently do not yet have a solution for this we will continue +like this: + +1. We block ballooning now in QEMU (with this patch). + +2. Later we will provide a change to virtio that removes the blocker +and adds VIRTIO_F_IOMMU_PLATFORM automatically by QEMU when doing the +protvirt switch. This is OK, as the balloon driver in Linux (the only +supported guest) will refuse to work with the IOMMU_PLATFORM feature +bit set. + +3. Later, we can fix the guest balloon driver to accept the IOMMU +feature bit and correctly exercise sharing and unsharing of balloon +pages. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-6-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit b1697f63fd8f8201b1447bb55f595830b9cbde31) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index dbd5125232..b4ebe83766 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -42,6 +42,7 @@ + #include "hw/qdev-properties.h" + #include "hw/s390x/tod.h" + #include "sysemu/sysemu.h" ++#include "sysemu/balloon.h" + #include "hw/s390x/pv.h" + #include + #include "migration/blocker.h" +@@ -330,6 +331,7 @@ static void s390_machine_unprotect(S390CcwMachineState *ms) + ms->pv = false; + migrate_del_blocker(pv_mig_blocker); + error_free_or_abort(&pv_mig_blocker); ++ qemu_balloon_inhibit(false); + } + + static int s390_machine_protect(S390CcwMachineState *ms) +@@ -337,10 +339,18 @@ static int s390_machine_protect(S390CcwMachineState *ms) + Error *local_err = NULL; + int rc; + ++ /* ++ * Ballooning on protected VMs needs support in the guest for ++ * sharing and unsharing balloon pages. Block ballooning for ++ * now, until we have a solution to make at least Linux guests ++ * either support it or fail gracefully. ++ */ ++ qemu_balloon_inhibit(true); + error_setg(&pv_mig_blocker, + "protected VMs are currently not migrateable."); + rc = migrate_add_blocker(pv_mig_blocker, &local_err); + if (rc) { ++ qemu_balloon_inhibit(false); + error_report_err(local_err); + error_free_or_abort(&pv_mig_blocker); + return rc; +@@ -349,6 +359,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) + /* Create SE VM */ + rc = s390_pv_vm_enable(); + if (rc) { ++ qemu_balloon_inhibit(false); + error_report_err(local_err); + migrate_del_blocker(pv_mig_blocker); + error_free_or_abort(&pv_mig_blocker); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-KVM-intercept-changes.patch b/SOURCES/kvm-s390x-protvirt-KVM-intercept-changes.patch new file mode 100644 index 0000000..2ac3d03 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-KVM-intercept-changes.patch @@ -0,0 +1,75 @@ +From 10ed4f6ad687d98f0bfe06d75775e8c541da80a0 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:07 -0400 +Subject: [PATCH 25/42] s390x: protvirt: KVM intercept changes + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-26-thuth@redhat.com> +Patchwork-id: 97035 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 25/38] s390x: protvirt: KVM intercept changes +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Protected VMs no longer intercept with code 4 for an instruction +interception. Instead they have codes 104 and 108 for protected +instruction interception and protected instruction notification +respectively. + +The 104 mirrors the 4 interception. + +The 108 is a notification interception to let KVM and QEMU know that +something changed and we need to update tracking information or +perform specific tasks. It's currently taken for the following +instructions: + +* spx (To inform about the changed prefix location) +* sclp (On incorrect SCCB values, so we can inject a IRQ) +* sigp (All but "stop and store status") +* diag308 (Subcodes 0/1) + +Of these exits only sclp errors, state changing sigps and diag308 will +reach QEMU. QEMU will do its parts of the job, while the ultravisor +has done the instruction part of the job. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-7-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 2585e507ffa1da01b57dbea26b1e1fe507d27198) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/kvm.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 9a0be13959..af50b2c253 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -115,6 +115,8 @@ + #define ICPT_CPU_STOP 0x28 + #define ICPT_OPEREXC 0x2c + #define ICPT_IO 0x40 ++#define ICPT_PV_INSTR 0x68 ++#define ICPT_PV_INSTR_NOTIFICATION 0x6c + + #define NR_LOCAL_IRQS 32 + /* +@@ -1695,6 +1697,8 @@ static int handle_intercept(S390CPU *cpu) + (long)cs->kvm_run->psw_addr); + switch (icpt_code) { + case ICPT_INSTRUCTION: ++ case ICPT_PV_INSTR: ++ case ICPT_PV_INSTR_NOTIFICATION: + r = handle_instruction(cpu, run); + break; + case ICPT_PROGRAM: +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch b/SOURCES/kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch new file mode 100644 index 0000000..0609546 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch @@ -0,0 +1,171 @@ +From 8345b90f43b14435938fbbe0f3a510a60f5d0ded Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:14 -0400 +Subject: [PATCH 32/42] s390x: protvirt: Move IO control structures over SIDA + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-33-thuth@redhat.com> +Patchwork-id: 97040 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 32/38] s390x: protvirt: Move IO control structures over SIDA +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +For protected guests, we need to put the IO emulation results into the +SIDA, so SIE will write them into the guest at the next entry. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-14-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit fcc10c1470d6e9460ebcf4c30f5bbd37b921a041) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/ioinst.c | 61 +++++++++++++++++++++++++++++++------------ + 1 file changed, 45 insertions(+), 16 deletions(-) + +diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c +index bbcccf6be2..f40c35c6ff 100644 +--- a/target/s390x/ioinst.c ++++ b/target/s390x/ioinst.c +@@ -138,7 +138,9 @@ void ioinst_handle_msch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; + } +- if (s390_cpu_virt_mem_read(cpu, addr, ar, &schib, sizeof(schib))) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_read(cpu, addr, &schib, sizeof(schib)); ++ } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &schib, sizeof(schib))) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + return; + } +@@ -195,7 +197,9 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; + } +- if (s390_cpu_virt_mem_read(cpu, addr, ar, &orig_orb, sizeof(orb))) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_read(cpu, addr, &orig_orb, sizeof(orb)); ++ } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &orig_orb, sizeof(orb))) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + return; + } +@@ -231,14 +235,19 @@ void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra) + cc = css_do_stcrw(&crw); + /* 0 - crw stored, 1 - zeroes stored */ + +- if (s390_cpu_virt_mem_write(cpu, addr, ar, &crw, sizeof(crw)) == 0) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_write(cpu, addr, &crw, sizeof(crw)); + setcc(cpu, cc); + } else { +- if (cc == 0) { +- /* Write failed: requeue CRW since STCRW is suppressing */ +- css_undo_stcrw(&crw); ++ if (s390_cpu_virt_mem_write(cpu, addr, ar, &crw, sizeof(crw)) == 0) { ++ setcc(cpu, cc); ++ } else { ++ if (cc == 0) { ++ /* Write failed: requeue CRW since STCRW is suppressing */ ++ css_undo_stcrw(&crw); ++ } ++ s390_cpu_virt_mem_handle_exc(cpu, ra); + } +- s390_cpu_virt_mem_handle_exc(cpu, ra); + } + } + +@@ -260,6 +269,13 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, + } + + if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { ++ /* ++ * The Ultravisor checks schid bit 16 to be one and bits 0-12 ++ * to be 0 and injects a operand exception itself. ++ * ++ * Hence we should never end up here. ++ */ ++ g_assert(!s390_is_pv()); + /* + * As operand exceptions have a lower priority than access exceptions, + * we check whether the memory area is writeable (injecting the +@@ -292,14 +308,17 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, + } + } + if (cc != 3) { +- if (s390_cpu_virt_mem_write(cpu, addr, ar, &schib, +- sizeof(schib)) != 0) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_write(cpu, addr, &schib, sizeof(schib)); ++ } else if (s390_cpu_virt_mem_write(cpu, addr, ar, &schib, ++ sizeof(schib)) != 0) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + return; + } + } else { + /* Access exceptions have a higher priority than cc3 */ +- if (s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib)) != 0) { ++ if (!s390_is_pv() && ++ s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib)) != 0) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + return; + } +@@ -336,7 +355,9 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) + } + /* 0 - status pending, 1 - not status pending, 3 - not operational */ + if (cc != 3) { +- if (s390_cpu_virt_mem_write(cpu, addr, ar, &irb, irb_len) != 0) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_write(cpu, addr, &irb, irb_len); ++ } else if (s390_cpu_virt_mem_write(cpu, addr, ar, &irb, irb_len) != 0) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + return -EFAULT; + } +@@ -344,7 +365,8 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) + } else { + irb_len = sizeof(irb) - sizeof(irb.emw); + /* Access exceptions have a higher priority than cc3 */ +- if (s390_cpu_virt_mem_check_write(cpu, addr, ar, irb_len) != 0) { ++ if (!s390_is_pv() && ++ s390_cpu_virt_mem_check_write(cpu, addr, ar, irb_len) != 0) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + return -EFAULT; + } +@@ -642,7 +664,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) + * present CHSC sub-handlers ... if we ever need more, we should take + * care of req->len here first. + */ +- if (s390_cpu_virt_mem_read(cpu, addr, reg, buf, sizeof(ChscReq))) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_read(cpu, addr, buf, sizeof(ChscReq)); ++ } else if (s390_cpu_virt_mem_read(cpu, addr, reg, buf, sizeof(ChscReq))) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + return; + } +@@ -675,11 +699,16 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) + break; + } + +- if (!s390_cpu_virt_mem_write(cpu, addr + len, reg, res, +- be16_to_cpu(res->len))) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_write(cpu, addr + len, res, be16_to_cpu(res->len)); + setcc(cpu, 0); /* Command execution complete */ + } else { +- s390_cpu_virt_mem_handle_exc(cpu, ra); ++ if (!s390_cpu_virt_mem_write(cpu, addr + len, reg, res, ++ be16_to_cpu(res->len))) { ++ setcc(cpu, 0); /* Command execution complete */ ++ } else { ++ s390_cpu_virt_mem_handle_exc(cpu, ra); ++ } + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch b/SOURCES/kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch new file mode 100644 index 0000000..1d60070 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch @@ -0,0 +1,70 @@ +From 27f5d8a3af2863e39b7c46a3128009988d772f15 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:09 -0400 +Subject: [PATCH 27/42] s390x: protvirt: Move STSI data over SIDAD + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-28-thuth@redhat.com> +Patchwork-id: 97046 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 27/38] s390x: protvirt: Move STSI data over SIDAD +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +For protected guests, we need to put the STSI emulation results into +the SIDA, so SIE will write them into the guest at the next entry. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-9-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 7c713b8acb70fb61f9650f8a7702dec546752bb6) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/kvm.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index f67bb5ce2c..6809a5ac40 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -50,6 +50,7 @@ + #include "exec/memattrs.h" + #include "hw/s390x/s390-virtio-ccw.h" + #include "hw/s390x/s390-virtio-hcall.h" ++#include "hw/s390x/pv.h" + + #ifndef DEBUG_KVM + #define DEBUG_KVM 0 +@@ -1803,7 +1804,9 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) + SysIB_322 sysib; + int del; + +- if (s390_cpu_virt_mem_read(cpu, addr, ar, &sysib, sizeof(sysib))) { ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_read(cpu, 0, &sysib, sizeof(sysib)); ++ } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &sysib, sizeof(sysib))) { + return; + } + /* Shift the stack of Extended Names to prepare for our own data */ +@@ -1843,7 +1846,11 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) + /* Insert UUID */ + memcpy(sysib.vm[0].uuid, &qemu_uuid, sizeof(sysib.vm[0].uuid)); + +- s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, sizeof(sysib)); ++ if (s390_is_pv()) { ++ s390_cpu_pv_mem_write(cpu, 0, &sysib, sizeof(sysib)); ++ } else { ++ s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, sizeof(sysib)); ++ } + } + + static int handle_stsi(S390CPU *cpu) +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch b/SOURCES/kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch new file mode 100644 index 0000000..1b22719 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch @@ -0,0 +1,93 @@ +From 33d4e21cfd236aecd9e4dbe8228d058fd1f22400 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:12 -0400 +Subject: [PATCH 30/42] s390x: protvirt: Move diag 308 data over SIDA + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-31-thuth@redhat.com> +Patchwork-id: 97048 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 30/38] s390x: protvirt: Move diag 308 data over SIDA +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +For protected guests the IPIB is written/read to/from the SIDA, so we +need those accesses to go through s390_cpu_pv_mem_read/write(). + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-12-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 9c61e11238cfa8f70e3eb90aac5d3e5646e5432f) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/diag.c | 25 ++++++++++++++++++++----- + 1 file changed, 20 insertions(+), 5 deletions(-) + +diff --git a/target/s390x/diag.c b/target/s390x/diag.c +index b2cbefb8cf..1a48429564 100644 +--- a/target/s390x/diag.c ++++ b/target/s390x/diag.c +@@ -75,6 +75,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + { + bool valid; + CPUState *cs = env_cpu(env); ++ S390CPU *cpu = S390_CPU(cs); + uint64_t addr = env->regs[r1]; + uint64_t subcode = env->regs[r3]; + IplParameterBlock *iplb; +@@ -111,13 +112,22 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + return; + } + iplb = g_new0(IplParameterBlock, 1); +- cpu_physical_memory_read(addr, iplb, sizeof(iplb->len)); ++ if (!s390_is_pv()) { ++ cpu_physical_memory_read(addr, iplb, sizeof(iplb->len)); ++ } else { ++ s390_cpu_pv_mem_read(cpu, 0, iplb, sizeof(iplb->len)); ++ } ++ + if (!iplb_valid_len(iplb)) { + env->regs[r1 + 1] = DIAG_308_RC_INVALID; + goto out; + } + +- cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); ++ if (!s390_is_pv()) { ++ cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); ++ } else { ++ s390_cpu_pv_mem_read(cpu, 0, iplb, be32_to_cpu(iplb->len)); ++ } + + valid = subcode == DIAG308_PV_SET ? iplb_valid_pv(iplb) : iplb_valid(iplb); + if (!valid) { +@@ -140,12 +150,17 @@ out: + } else { + iplb = s390_ipl_get_iplb(); + } +- if (iplb) { ++ if (!iplb) { ++ env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; ++ return; ++ } ++ ++ if (!s390_is_pv()) { + cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); +- env->regs[r1 + 1] = DIAG_308_RC_OK; + } else { +- env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; ++ s390_cpu_pv_mem_write(cpu, 0, iplb, be32_to_cpu(iplb->len)); + } ++ env->regs[r1 + 1] = DIAG_308_RC_OK; + return; + case DIAG308_PV_START: + iplb = s390_ipl_get_iplb_pv(); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-SCLP-interpretation.patch b/SOURCES/kvm-s390x-protvirt-SCLP-interpretation.patch new file mode 100644 index 0000000..10f1930 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-SCLP-interpretation.patch @@ -0,0 +1,172 @@ +From 5a8b40c3fdafeb49072f8643210bea00ce1478c4 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:10 -0400 +Subject: [PATCH 28/42] s390x: protvirt: SCLP interpretation + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-29-thuth@redhat.com> +Patchwork-id: 97053 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 28/38] s390x: protvirt: SCLP interpretation +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +SCLP for a protected guest is done over the SIDAD, so we need to use +the s390_cpu_pv_mem_* functions to access the SIDAD instead of guest +memory when reading/writing SCBs. + +To not confuse the sclp emulation, we set 0x4000 as the SCCB address, +since the function that injects the sclp external interrupt would +reject a zero sccb address. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Reviewed-by: Christian Borntraeger +Message-Id: <20200319131921.2367-10-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 0f73c5b30b8ba6c0828608be496d2f59a5427539) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 56 +++++++++++++++++++++++++++++++++-------- + include/hw/s390x/sclp.h | 2 ++ + target/s390x/kvm.c | 25 ++++++++++++++---- + 3 files changed, 67 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index f57ce7b739..1c380a49cc 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -33,6 +33,22 @@ static inline SCLPDevice *get_sclp_device(void) + return sclp; + } + ++static inline bool sclp_command_code_valid(uint32_t code) ++{ ++ switch (code & SCLP_CMD_CODE_MASK) { ++ case SCLP_CMDW_READ_SCP_INFO: ++ case SCLP_CMDW_READ_SCP_INFO_FORCED: ++ case SCLP_CMDW_READ_CPU_INFO: ++ case SCLP_CMDW_CONFIGURE_IOA: ++ case SCLP_CMDW_DECONFIGURE_IOA: ++ case SCLP_CMD_READ_EVENT_DATA: ++ case SCLP_CMD_WRITE_EVENT_DATA: ++ case SCLP_CMD_WRITE_EVENT_MASK: ++ return true; ++ } ++ return false; ++} ++ + static void prepare_cpu_entries(SCLPDevice *sclp, CPUEntry *entry, int *count) + { + MachineState *ms = MACHINE(qdev_get_machine()); +@@ -193,6 +209,34 @@ static void sclp_execute(SCLPDevice *sclp, SCCB *sccb, uint32_t code) + } + } + ++/* ++ * We only need the address to have something valid for the ++ * service_interrupt call. ++ */ ++#define SCLP_PV_DUMMY_ADDR 0x4000 ++int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, ++ uint32_t code) ++{ ++ SCLPDevice *sclp = get_sclp_device(); ++ SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); ++ SCCB work_sccb; ++ hwaddr sccb_len = sizeof(SCCB); ++ ++ s390_cpu_pv_mem_read(env_archcpu(env), 0, &work_sccb, sccb_len); ++ ++ if (!sclp_command_code_valid(code)) { ++ work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); ++ goto out_write; ++ } ++ ++ sclp_c->execute(sclp, &work_sccb, code); ++out_write: ++ s390_cpu_pv_mem_write(env_archcpu(env), 0, &work_sccb, ++ be16_to_cpu(work_sccb.h.length)); ++ sclp_c->service_interrupt(sclp, SCLP_PV_DUMMY_ADDR); ++ return 0; ++} ++ + int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + { + SCLPDevice *sclp = get_sclp_device(); +@@ -230,17 +274,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + goto out; + } + +- switch (code & SCLP_CMD_CODE_MASK) { +- case SCLP_CMDW_READ_SCP_INFO: +- case SCLP_CMDW_READ_SCP_INFO_FORCED: +- case SCLP_CMDW_READ_CPU_INFO: +- case SCLP_CMDW_CONFIGURE_IOA: +- case SCLP_CMDW_DECONFIGURE_IOA: +- case SCLP_CMD_READ_EVENT_DATA: +- case SCLP_CMD_WRITE_EVENT_DATA: +- case SCLP_CMD_WRITE_EVENT_MASK: +- break; +- default: ++ if (!sclp_command_code_valid(code)) { + work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); + goto out_write; + } +diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h +index c54413b78c..c0a3faa37d 100644 +--- a/include/hw/s390x/sclp.h ++++ b/include/hw/s390x/sclp.h +@@ -217,5 +217,7 @@ void s390_sclp_init(void); + void sclp_service_interrupt(uint32_t sccb); + void raise_irq_cpu_hotplug(void); + int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code); ++int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, ++ uint32_t code); + + #endif +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 6809a5ac40..56fe60c49c 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -1230,12 +1230,27 @@ static void kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, + sccb = env->regs[ipbh0 & 0xf]; + code = env->regs[(ipbh0 & 0xf0) >> 4]; + +- r = sclp_service_call(env, sccb, code); +- if (r < 0) { +- kvm_s390_program_interrupt(cpu, -r); +- return; ++ switch (run->s390_sieic.icptcode) { ++ case ICPT_PV_INSTR_NOTIFICATION: ++ g_assert(s390_is_pv()); ++ /* The notification intercepts are currently handled by KVM */ ++ error_report("unexpected SCLP PV notification"); ++ exit(1); ++ break; ++ case ICPT_PV_INSTR: ++ g_assert(s390_is_pv()); ++ sclp_service_call_protected(env, sccb, code); ++ /* Setting the CC is done by the Ultravisor. */ ++ break; ++ case ICPT_INSTRUCTION: ++ g_assert(!s390_is_pv()); ++ r = sclp_service_call(env, sccb, code); ++ if (r < 0) { ++ kvm_s390_program_interrupt(cpu, -r); ++ return; ++ } ++ setcc(cpu, r); + } +- setcc(cpu, r); + } + + static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Set-guest-IPL-PSW.patch b/SOURCES/kvm-s390x-protvirt-Set-guest-IPL-PSW.patch new file mode 100644 index 0000000..ef246c7 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Set-guest-IPL-PSW.patch @@ -0,0 +1,75 @@ +From d738b4336c79be68b6040f73427e089f46957728 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:11 -0400 +Subject: [PATCH 29/42] s390x: protvirt: Set guest IPL PSW + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-30-thuth@redhat.com> +Patchwork-id: 97049 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 29/38] s390x: protvirt: Set guest IPL PSW +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Handling of CPU reset and setting of the IPL psw from guest storage at +offset 0 is done by a Ultravisor call. Let's only fetch it if +necessary. + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Reviewed-by: David Hildenbrand +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200319131921.2367-11-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 59181010a2ff82c3a97e9b5768ee87c38e4815f1) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu.c | 26 +++++++++++++++++--------- + 1 file changed, 17 insertions(+), 9 deletions(-) + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index 8f38cd8e6f..371b91b2d7 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -77,16 +77,24 @@ static bool s390_cpu_has_work(CPUState *cs) + static void s390_cpu_load_normal(CPUState *s) + { + S390CPU *cpu = S390_CPU(s); +- uint64_t spsw = ldq_phys(s->as, 0); +- +- cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; +- /* +- * Invert short psw indication, so SIE will report a specification +- * exception if it was not set. +- */ +- cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; +- cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; ++ uint64_t spsw; + ++ if (!s390_is_pv()) { ++ spsw = ldq_phys(s->as, 0); ++ cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; ++ /* ++ * Invert short psw indication, so SIE will report a specification ++ * exception if it was not set. ++ */ ++ cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; ++ cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; ++ } else { ++ /* ++ * Firmware requires us to set the load state before we set ++ * the cpu to operating on protected guests. ++ */ ++ s390_cpu_set_state(S390_CPU_STATE_LOAD, cpu); ++ } + s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); + } + #endif +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-Support-unpack-facility.patch b/SOURCES/kvm-s390x-protvirt-Support-unpack-facility.patch new file mode 100644 index 0000000..204de2a --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-Support-unpack-facility.patch @@ -0,0 +1,886 @@ +From e6474080e3816e82e87c545a3d22db77c55ab053 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:04 -0400 +Subject: [PATCH 22/42] s390x: protvirt: Support unpack facility + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-23-thuth@redhat.com> +Patchwork-id: 97045 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 22/38] s390x: protvirt: Support unpack facility +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +The unpack facility provides the means to setup a protected guest. A +protected guest cannot be introspected by the hypervisor or any +user/administrator of the machine it is running on. + +Protected guests are encrypted at rest and need a special boot +mechanism via diag308 subcode 8 and 10. + +Code 8 sets the PV specific IPLB which is retained separately from +those set via code 5. + +Code 10 is used to unpack the VM into protected memory, verify its +integrity and start it. + +Signed-off-by: Janosch Frank +Co-developed-by: Christian Borntraeger [Changes +to machine] +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Reviewed-by: Cornelia Huck +Message-Id: <20200323083606.24520-1-frankja@linux.ibm.com> +[CH: fixed up KVM_PV_VM_ -> KVM_PV_] +Signed-off-by: Cornelia Huck +(cherry picked from commit c3347ed0d2ee42a7dcf7bfe7f9c3884a9596727a) +Signed-off-by: Danilo C. L. de Paula +--- + MAINTAINERS | 2 + + hw/s390x/Makefile.objs | 1 + + hw/s390x/ipl.c | 59 +++++++++++++- + hw/s390x/ipl.h | 91 ++++++++++++++++++++- + hw/s390x/pv.c | 98 +++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 119 +++++++++++++++++++++++++++- + include/hw/s390x/pv.h | 55 +++++++++++++ + include/hw/s390x/s390-virtio-ccw.h | 1 + + target/s390x/cpu.c | 1 + + target/s390x/cpu_features_def.inc.h | 1 + + target/s390x/diag.c | 39 ++++++++- + target/s390x/kvm-stub.c | 5 ++ + target/s390x/kvm.c | 5 ++ + target/s390x/kvm_s390x.h | 1 + + 14 files changed, 468 insertions(+), 10 deletions(-) + create mode 100644 hw/s390x/pv.c + create mode 100644 include/hw/s390x/pv.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 49d5d44edc..2742c95575 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -385,6 +385,8 @@ F: target/s390x/machine.c + F: target/s390x/sigp.c + F: target/s390x/cpu_features*.[ch] + F: target/s390x/cpu_models.[ch] ++F: hw/s390x/pv.c ++F: include/hw/s390x/pv.h + F: hw/intc/s390_flic.c + F: hw/intc/s390_flic_kvm.c + F: include/hw/s390x/s390_flic.h +diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs +index e02ed80b68..a46a1c7894 100644 +--- a/hw/s390x/Makefile.objs ++++ b/hw/s390x/Makefile.objs +@@ -31,6 +31,7 @@ obj-y += tod-qemu.o + obj-$(CONFIG_KVM) += tod-kvm.o + obj-$(CONFIG_KVM) += s390-skeys-kvm.o + obj-$(CONFIG_KVM) += s390-stattrib-kvm.o ++obj-$(CONFIG_KVM) += pv.o + obj-y += s390-ccw.o + obj-y += ap-device.o + obj-y += ap-bridge.o +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index fa0409dc23..586d95b5b6 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -1,10 +1,11 @@ + /* + * bootloader support + * +- * Copyright IBM, Corp. 2012 ++ * Copyright IBM, Corp. 2012, 2020 + * + * Authors: + * Christian Borntraeger ++ * Janosch Frank + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. +@@ -27,6 +28,7 @@ + #include "hw/s390x/vfio-ccw.h" + #include "hw/s390x/css.h" + #include "hw/s390x/ebcdic.h" ++#include "hw/s390x/pv.h" + #include "ipl.h" + #include "qemu/error-report.h" + #include "qemu/config-file.h" +@@ -557,12 +559,31 @@ void s390_ipl_update_diag308(IplParameterBlock *iplb) + { + S390IPLState *ipl = get_ipl_device(); + +- ipl->iplb = *iplb; +- ipl->iplb_valid = true; ++ /* ++ * The IPLB set and retrieved by subcodes 8/9 is completely ++ * separate from the one managed via subcodes 5/6. ++ */ ++ if (iplb->pbt == S390_IPL_TYPE_PV) { ++ ipl->iplb_pv = *iplb; ++ ipl->iplb_valid_pv = true; ++ } else { ++ ipl->iplb = *iplb; ++ ipl->iplb_valid = true; ++ } + ipl->netboot = is_virtio_net_device(iplb); + update_machine_ipl_properties(iplb); + } + ++IplParameterBlock *s390_ipl_get_iplb_pv(void) ++{ ++ S390IPLState *ipl = get_ipl_device(); ++ ++ if (!ipl->iplb_valid_pv) { ++ return NULL; ++ } ++ return &ipl->iplb_pv; ++} ++ + IplParameterBlock *s390_ipl_get_iplb(void) + { + S390IPLState *ipl = get_ipl_device(); +@@ -651,6 +672,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) + cpu_physical_memory_unmap(addr, len, 1, len); + } + ++int s390_ipl_prepare_pv_header(void) ++{ ++ IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); ++ IPLBlockPV *ipib_pv = &ipib->pv; ++ void *hdr = g_malloc(ipib_pv->pv_header_len); ++ int rc; ++ ++ cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, ++ ipib_pv->pv_header_len); ++ rc = s390_pv_set_sec_parms((uintptr_t)hdr, ++ ipib_pv->pv_header_len); ++ g_free(hdr); ++ return rc; ++} ++ ++int s390_ipl_pv_unpack(void) ++{ ++ IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); ++ IPLBlockPV *ipib_pv = &ipib->pv; ++ int i, rc = 0; ++ ++ for (i = 0; i < ipib_pv->num_comp; i++) { ++ rc = s390_pv_unpack(ipib_pv->components[i].addr, ++ TARGET_PAGE_ALIGN(ipib_pv->components[i].size), ++ ipib_pv->components[i].tweak_pref); ++ if (rc) { ++ break; ++ } ++ } ++ return rc; ++} ++ + void s390_ipl_prepare_cpu(S390CPU *cpu) + { + S390IPLState *ipl = get_ipl_device(); +diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h +index a5665e6bfd..89b3044d7a 100644 +--- a/hw/s390x/ipl.h ++++ b/hw/s390x/ipl.h +@@ -1,8 +1,9 @@ + /* + * s390 IPL device + * +- * Copyright 2015 IBM Corp. ++ * Copyright 2015, 2020 IBM Corp. + * Author(s): Zhang Fan ++ * Janosch Frank + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level +@@ -15,6 +16,24 @@ + #include "cpu.h" + #include "hw/qdev-core.h" + ++struct IPLBlockPVComp { ++ uint64_t tweak_pref; ++ uint64_t addr; ++ uint64_t size; ++} QEMU_PACKED; ++typedef struct IPLBlockPVComp IPLBlockPVComp; ++ ++struct IPLBlockPV { ++ uint8_t reserved18[87]; /* 0x18 */ ++ uint8_t version; /* 0x6f */ ++ uint32_t reserved70; /* 0x70 */ ++ uint32_t num_comp; /* 0x74 */ ++ uint64_t pv_header_addr; /* 0x78 */ ++ uint64_t pv_header_len; /* 0x80 */ ++ struct IPLBlockPVComp components[]; ++} QEMU_PACKED; ++typedef struct IPLBlockPV IPLBlockPV; ++ + struct IplBlockCcw { + uint8_t reserved0[85]; + uint8_t ssid; +@@ -71,6 +90,7 @@ union IplParameterBlock { + union { + IplBlockCcw ccw; + IplBlockFcp fcp; ++ IPLBlockPV pv; + IplBlockQemuScsi scsi; + }; + } QEMU_PACKED; +@@ -85,8 +105,11 @@ typedef union IplParameterBlock IplParameterBlock; + + int s390_ipl_set_loadparm(uint8_t *loadparm); + void s390_ipl_update_diag308(IplParameterBlock *iplb); ++int s390_ipl_prepare_pv_header(void); ++int s390_ipl_pv_unpack(void); + void s390_ipl_prepare_cpu(S390CPU *cpu); + IplParameterBlock *s390_ipl_get_iplb(void); ++IplParameterBlock *s390_ipl_get_iplb_pv(void); + + enum s390_reset { + /* default is a reset not triggered by a CPU e.g. issued by QMP */ +@@ -94,6 +117,7 @@ enum s390_reset { + S390_RESET_REIPL, + S390_RESET_MODIFIED_CLEAR, + S390_RESET_LOAD_NORMAL, ++ S390_RESET_PV, + }; + void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type); + void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type); +@@ -133,6 +157,7 @@ struct S390IPLState { + /*< private >*/ + DeviceState parent_obj; + IplParameterBlock iplb; ++ IplParameterBlock iplb_pv; + QemuIplParameters qipl; + uint64_t start_addr; + uint64_t compat_start_addr; +@@ -140,6 +165,7 @@ struct S390IPLState { + uint64_t compat_bios_start_addr; + bool enforce_bios; + bool iplb_valid; ++ bool iplb_valid_pv; + bool netboot; + /* reset related properties don't have to be migrated or reset */ + enum s390_reset reset_type; +@@ -162,6 +188,8 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); + #define DIAG_308_RC_OK 0x0001 + #define DIAG_308_RC_NO_CONF 0x0102 + #define DIAG_308_RC_INVALID 0x0402 ++#define DIAG_308_RC_NO_PV_CONF 0x0902 ++#define DIAG_308_RC_INVAL_FOR_PV 0x0a02 + + #define DIAG308_RESET_MOD_CLR 0 + #define DIAG308_RESET_LOAD_NORM 1 +@@ -169,12 +197,17 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); + #define DIAG308_LOAD_NORMAL_DUMP 4 + #define DIAG308_SET 5 + #define DIAG308_STORE 6 ++#define DIAG308_PV_SET 8 ++#define DIAG308_PV_STORE 9 ++#define DIAG308_PV_START 10 + + #define S390_IPL_TYPE_FCP 0x00 + #define S390_IPL_TYPE_CCW 0x02 ++#define S390_IPL_TYPE_PV 0x05 + #define S390_IPL_TYPE_QEMU_SCSI 0xff + + #define S390_IPLB_HEADER_LEN 8 ++#define S390_IPLB_MIN_PV_LEN 148 + #define S390_IPLB_MIN_CCW_LEN 200 + #define S390_IPLB_MIN_FCP_LEN 384 + #define S390_IPLB_MIN_QEMU_SCSI_LEN 200 +@@ -184,6 +217,62 @@ static inline bool iplb_valid_len(IplParameterBlock *iplb) + return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock); + } + ++static inline bool ipl_valid_pv_components(IplParameterBlock *iplb) ++{ ++ IPLBlockPV *ipib_pv = &iplb->pv; ++ int i; ++ ++ if (ipib_pv->num_comp == 0) { ++ return false; ++ } ++ ++ for (i = 0; i < ipib_pv->num_comp; i++) { ++ /* Addr must be 4k aligned */ ++ if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) { ++ return false; ++ } ++ ++ /* Tweak prefix is monotonically increasing with each component */ ++ if (i < ipib_pv->num_comp - 1 && ++ ipib_pv->components[i].tweak_pref >= ++ ipib_pv->components[i + 1].tweak_pref) { ++ return false; ++ } ++ } ++ return true; ++} ++ ++static inline bool ipl_valid_pv_header(IplParameterBlock *iplb) ++{ ++ IPLBlockPV *ipib_pv = &iplb->pv; ++ ++ if (ipib_pv->pv_header_len > 2 * TARGET_PAGE_SIZE) { ++ return false; ++ } ++ ++ if (!address_space_access_valid(&address_space_memory, ++ ipib_pv->pv_header_addr, ++ ipib_pv->pv_header_len, ++ false, ++ MEMTXATTRS_UNSPECIFIED)) { ++ return false; ++ } ++ ++ return true; ++} ++ ++static inline bool iplb_valid_pv(IplParameterBlock *iplb) ++{ ++ if (iplb->pbt != S390_IPL_TYPE_PV || ++ be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN) { ++ return false; ++ } ++ if (!ipl_valid_pv_header(iplb)) { ++ return false; ++ } ++ return ipl_valid_pv_components(iplb); ++} ++ + static inline bool iplb_valid(IplParameterBlock *iplb) + { + switch (iplb->pbt) { +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +new file mode 100644 +index 0000000000..a40a844806 +--- /dev/null ++++ b/hw/s390x/pv.c +@@ -0,0 +1,98 @@ ++/* ++ * Protected Virtualization functions ++ * ++ * Copyright IBM Corp. 2020 ++ * Author(s): ++ * Janosch Frank ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++#include "qemu/osdep.h" ++ ++#include ++ ++#include "qemu/error-report.h" ++#include "sysemu/kvm.h" ++#include "hw/s390x/pv.h" ++ ++static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) ++{ ++ struct kvm_pv_cmd pv_cmd = { ++ .cmd = cmd, ++ .data = (uint64_t)data, ++ }; ++ int rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); ++ ++ if (rc) { ++ error_report("KVM PV command %d (%s) failed: header rc %x rrc %x " ++ "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, ++ rc); ++ } ++ return rc; ++} ++ ++/* ++ * This macro lets us pass the command as a string to the function so ++ * we can print it on an error. ++ */ ++#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data); ++#define s390_pv_cmd_exit(cmd, data) \ ++{ \ ++ int rc; \ ++ \ ++ rc = __s390_pv_cmd(cmd, #cmd, data);\ ++ if (rc) { \ ++ exit(1); \ ++ } \ ++} ++ ++int s390_pv_vm_enable(void) ++{ ++ return s390_pv_cmd(KVM_PV_ENABLE, NULL); ++} ++ ++void s390_pv_vm_disable(void) ++{ ++ s390_pv_cmd_exit(KVM_PV_DISABLE, NULL); ++} ++ ++int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) ++{ ++ struct kvm_s390_pv_sec_parm args = { ++ .origin = origin, ++ .length = length, ++ }; ++ ++ return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args); ++} ++ ++/* ++ * Called for each component in the SE type IPL parameter block 0. ++ */ ++int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) ++{ ++ struct kvm_s390_pv_unp args = { ++ .addr = addr, ++ .size = size, ++ .tweak = tweak, ++ }; ++ ++ return s390_pv_cmd(KVM_PV_UNPACK, &args); ++} ++ ++void s390_pv_perf_clear_reset(void) ++{ ++ s390_pv_cmd_exit(KVM_PV_PREP_RESET, NULL); ++} ++ ++int s390_pv_verify(void) ++{ ++ return s390_pv_cmd(KVM_PV_VERIFY, NULL); ++} ++ ++void s390_pv_unshare(void) ++{ ++ s390_pv_cmd_exit(KVM_PV_UNSHARE_ALL, NULL); ++} +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 4ea01c53c0..82da1d9ab5 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1,9 +1,10 @@ + /* + * virtio ccw machine + * +- * Copyright 2012 IBM Corp. ++ * Copyright 2012, 2020 IBM Corp. + * Copyright (c) 2009 Alexander Graf + * Author(s): Cornelia Huck ++ * Janosch Frank + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level +@@ -41,6 +42,8 @@ + #include "hw/qdev-properties.h" + #include "hw/s390x/tod.h" + #include "sysemu/sysemu.h" ++#include "hw/s390x/pv.h" ++#include + + S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) + { +@@ -318,10 +321,78 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) + s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); + } + ++static void s390_machine_unprotect(S390CcwMachineState *ms) ++{ ++ s390_pv_vm_disable(); ++ ms->pv = false; ++} ++ ++static int s390_machine_protect(S390CcwMachineState *ms) ++{ ++ int rc; ++ ++ /* Create SE VM */ ++ rc = s390_pv_vm_enable(); ++ if (rc) { ++ return rc; ++ } ++ ++ ms->pv = true; ++ ++ /* Set SE header and unpack */ ++ rc = s390_ipl_prepare_pv_header(); ++ if (rc) { ++ goto out_err; ++ } ++ ++ /* Decrypt image */ ++ rc = s390_ipl_pv_unpack(); ++ if (rc) { ++ goto out_err; ++ } ++ ++ /* Verify integrity */ ++ rc = s390_pv_verify(); ++ if (rc) { ++ goto out_err; ++ } ++ return rc; ++ ++out_err: ++ s390_machine_unprotect(ms); ++ return rc; ++} ++ ++static void s390_machine_inject_pv_error(CPUState *cs) ++{ ++ int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; ++ CPUS390XState *env = &S390_CPU(cs)->env; ++ ++ /* Report that we are unable to enter protected mode */ ++ env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; ++} ++ ++static void s390_pv_prepare_reset(S390CcwMachineState *ms) ++{ ++ CPUState *cs; ++ ++ if (!s390_is_pv()) { ++ return; ++ } ++ /* Unsharing requires all cpus to be stopped */ ++ CPU_FOREACH(cs) { ++ s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); ++ } ++ s390_pv_unshare(); ++ s390_pv_perf_clear_reset(); ++} ++ + static void s390_machine_reset(MachineState *machine) + { ++ S390CcwMachineState *ms = S390_CCW_MACHINE(machine); + enum s390_reset reset_type; + CPUState *cs, *t; ++ S390CPU *cpu; + + /* get the reset parameters, reset them once done */ + s390_ipl_get_reset_request(&cs, &reset_type); +@@ -329,9 +400,15 @@ static void s390_machine_reset(MachineState *machine) + /* all CPUs are paused and synchronized at this point */ + s390_cmma_reset(); + ++ cpu = S390_CPU(cs); ++ + switch (reset_type) { + case S390_RESET_EXTERNAL: + case S390_RESET_REIPL: ++ if (s390_is_pv()) { ++ s390_machine_unprotect(ms); ++ } ++ + qemu_devices_reset(); + s390_crypto_reset(); + +@@ -339,22 +416,56 @@ static void s390_machine_reset(MachineState *machine) + run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL); + break; + case S390_RESET_MODIFIED_CLEAR: ++ /* ++ * Susbsystem reset needs to be done before we unshare memory ++ * and lose access to VIRTIO structures in guest memory. ++ */ ++ subsystem_reset(); ++ s390_crypto_reset(); ++ s390_pv_prepare_reset(ms); + CPU_FOREACH(t) { + run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); + } +- subsystem_reset(); +- s390_crypto_reset(); + run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); + break; + case S390_RESET_LOAD_NORMAL: ++ /* ++ * Susbsystem reset needs to be done before we unshare memory ++ * and lose access to VIRTIO structures in guest memory. ++ */ ++ subsystem_reset(); ++ s390_pv_prepare_reset(ms); + CPU_FOREACH(t) { + if (t == cs) { + continue; + } + run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); + } +- subsystem_reset(); + run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); ++ run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); ++ break; ++ case S390_RESET_PV: /* Subcode 10 */ ++ subsystem_reset(); ++ s390_crypto_reset(); ++ ++ CPU_FOREACH(t) { ++ if (t == cs) { ++ continue; ++ } ++ run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); ++ } ++ run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL); ++ ++ if (s390_machine_protect(ms)) { ++ s390_machine_inject_pv_error(cs); ++ /* ++ * Continue after the diag308 so the guest knows something ++ * went wrong. ++ */ ++ s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); ++ return; ++ } ++ + run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); + break; + default: +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +new file mode 100644 +index 0000000000..c6cb360f2f +--- /dev/null ++++ b/include/hw/s390x/pv.h +@@ -0,0 +1,55 @@ ++/* ++ * Protected Virtualization header ++ * ++ * Copyright IBM Corp. 2020 ++ * Author(s): ++ * Janosch Frank ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++#ifndef HW_S390_PV_H ++#define HW_S390_PV_H ++ ++#ifdef CONFIG_KVM ++#include "hw/s390x/s390-virtio-ccw.h" ++ ++static inline bool s390_is_pv(void) ++{ ++ static S390CcwMachineState *ccw; ++ Object *obj; ++ ++ if (ccw) { ++ return ccw->pv; ++ } ++ ++ /* we have to bail out for the "none" machine */ ++ obj = object_dynamic_cast(qdev_get_machine(), ++ TYPE_S390_CCW_MACHINE); ++ if (!obj) { ++ return false; ++ } ++ ccw = S390_CCW_MACHINE(obj); ++ return ccw->pv; ++} ++ ++int s390_pv_vm_enable(void); ++void s390_pv_vm_disable(void); ++int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); ++int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); ++void s390_pv_perf_clear_reset(void); ++int s390_pv_verify(void); ++void s390_pv_unshare(void); ++#else /* CONFIG_KVM */ ++static inline bool s390_is_pv(void) { return false; } ++static inline int s390_pv_vm_enable(void) { return 0; } ++static inline void s390_pv_vm_disable(void) {} ++static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } ++static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } ++static inline void s390_pv_perf_clear_reset(void) {} ++static inline int s390_pv_verify(void) { return 0; } ++static inline void s390_pv_unshare(void) {} ++#endif /* CONFIG_KVM */ ++ ++#endif /* HW_S390_PV_H */ +diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h +index 8aa27199c9..cd1dccc6e3 100644 +--- a/include/hw/s390x/s390-virtio-ccw.h ++++ b/include/hw/s390x/s390-virtio-ccw.h +@@ -28,6 +28,7 @@ typedef struct S390CcwMachineState { + /*< public >*/ + bool aes_key_wrap; + bool dea_key_wrap; ++ bool pv; + uint8_t loadparm[8]; + } S390CcwMachineState; + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index c0dd502b84..8f38cd8e6f 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -37,6 +37,7 @@ + #include "sysemu/hw_accel.h" + #include "hw/qdev-properties.h" + #ifndef CONFIG_USER_ONLY ++#include "hw/s390x/pv.h" + #include "hw/boards.h" + #include "sysemu/arch_init.h" + #include "sysemu/sysemu.h" +diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h +index 31dff0d84e..60db28351d 100644 +--- a/target/s390x/cpu_features_def.inc.h ++++ b/target/s390x/cpu_features_def.inc.h +@@ -107,6 +107,7 @@ DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, "Deflate-conversion facility ( + DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-Enhancement Facility") + DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)") + DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") ++DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") + + /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ + DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility") +diff --git a/target/s390x/diag.c b/target/s390x/diag.c +index 8aba6341f9..b2cbefb8cf 100644 +--- a/target/s390x/diag.c ++++ b/target/s390x/diag.c +@@ -20,6 +20,8 @@ + #include "sysemu/cpus.h" + #include "hw/s390x/ipl.h" + #include "hw/s390x/s390-virtio-ccw.h" ++#include "hw/s390x/pv.h" ++#include "kvm_s390x.h" + + int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) + { +@@ -52,6 +54,10 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) + static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, + uintptr_t ra, bool write) + { ++ /* Handled by the Ultravisor */ ++ if (s390_is_pv()) { ++ return 0; ++ } + if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return -1; +@@ -67,6 +73,7 @@ static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, + + void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + { ++ bool valid; + CPUState *cs = env_cpu(env); + uint64_t addr = env->regs[r1]; + uint64_t subcode = env->regs[r3]; +@@ -82,6 +89,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + return; + } + ++ if (subcode >= DIAG308_PV_SET && !s390_has_feat(S390_FEAT_UNPACK)) { ++ s390_program_interrupt(env, PGM_SPECIFICATION, ra); ++ return; ++ } ++ + switch (subcode) { + case DIAG308_RESET_MOD_CLR: + s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR); +@@ -94,6 +106,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + s390_ipl_reset_request(cs, S390_RESET_REIPL); + break; + case DIAG308_SET: ++ case DIAG308_PV_SET: + if (diag308_parm_check(env, r1, addr, ra, false)) { + return; + } +@@ -106,7 +119,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) + + cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); + +- if (!iplb_valid(iplb)) { ++ valid = subcode == DIAG308_PV_SET ? iplb_valid_pv(iplb) : iplb_valid(iplb); ++ if (!valid) { + env->regs[r1 + 1] = DIAG_308_RC_INVALID; + goto out; + } +@@ -117,10 +131,15 @@ out: + g_free(iplb); + return; + case DIAG308_STORE: ++ case DIAG308_PV_STORE: + if (diag308_parm_check(env, r1, addr, ra, true)) { + return; + } +- iplb = s390_ipl_get_iplb(); ++ if (subcode == DIAG308_PV_STORE) { ++ iplb = s390_ipl_get_iplb_pv(); ++ } else { ++ iplb = s390_ipl_get_iplb(); ++ } + if (iplb) { + cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); + env->regs[r1 + 1] = DIAG_308_RC_OK; +@@ -128,6 +147,22 @@ out: + env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; + } + return; ++ case DIAG308_PV_START: ++ iplb = s390_ipl_get_iplb_pv(); ++ if (!iplb) { ++ env->regs[r1 + 1] = DIAG_308_RC_NO_PV_CONF; ++ return; ++ } ++ ++ if (kvm_s390_get_hpage_1m()) { ++ error_report("Protected VMs can currently not be backed with " ++ "huge pages"); ++ env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; ++ return; ++ } ++ ++ s390_ipl_reset_request(cs, S390_RESET_PV); ++ break; + default: + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + break; +diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c +index c4cd497f85..aa185017a2 100644 +--- a/target/s390x/kvm-stub.c ++++ b/target/s390x/kvm-stub.c +@@ -39,6 +39,11 @@ int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu) + return 0; + } + ++int kvm_s390_get_hpage_1m(void) ++{ ++ return 0; ++} ++ + int kvm_s390_get_ri(void) + { + return 0; +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 75d82af6fc..9a0be13959 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -321,6 +321,11 @@ void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp) + cap_hpage_1m = 1; + } + ++int kvm_s390_get_hpage_1m(void) ++{ ++ return cap_hpage_1m; ++} ++ + static void ccw_machine_class_foreach(ObjectClass *oc, void *opaque) + { + MachineClass *mc = MACHINE_CLASS(oc); +diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h +index 0b21789796..dea813f450 100644 +--- a/target/s390x/kvm_s390x.h ++++ b/target/s390x/kvm_s390x.h +@@ -23,6 +23,7 @@ void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code); + int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); + void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); + int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); ++int kvm_s390_get_hpage_1m(void); + int kvm_s390_get_ri(void); + int kvm_s390_get_gs(void); + int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch b/SOURCES/kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch new file mode 100644 index 0000000..b12b458 --- /dev/null +++ b/SOURCES/kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch @@ -0,0 +1,61 @@ +From 8b994757136780998e0dd1d41613d2006c0dbcf6 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 4 Aug 2020 10:16:04 -0400 +Subject: [PATCH 4/4] s390x/protvirt: allow to IPL secure guests with + -no-reboot + +RH-Author: Thomas Huth +Message-id: <20200804101604.6259-2-thuth@redhat.com> +Patchwork-id: 98126 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] s390x/protvirt: allow to IPL secure guests with -no-reboot +Bugzilla: 1863034 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Christian Borntraeger + +Right now, -no-reboot prevents secure guests from running. This is +correct from an implementation point of view, as we have modeled the +transition from non-secure to secure as a program directed IPL. From +a user perspective, this is not the behavior of least surprise. + +We should implement the IPL into protected mode similar to the +functions that we use for kdump/kexec. In other words, we do not stop +here when -no-reboot is specified on the command line. Like function 0 +or function 1, function 10 is not a classic reboot. For example, it +can only be called once. Before calling it a second time, a real +reboot/reset must happen in-between. So function code 10 is more or +less a state transition reset, but not a "standard" reset or reboot. + +Fixes: 4d226deafc44 ("s390x: protvirt: Support unpack facility") +Signed-off-by: Christian Borntraeger +Reviewed-by: Janosch Frank +Reviewed-by: David Hildenbrand +Acked-by: Viktor Mihajlovski +Message-Id: <20200721103202.30610-1-borntraeger@de.ibm.com> +[CH: tweaked description] +Signed-off-by: Cornelia Huck +(cherry picked from commit d1bb69db4ceb6897ef6a17bf263146b53a123632) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/ipl.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index 586d95b5b6..5b3ea990af 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -624,7 +624,8 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type) + } + } + if (reset_type == S390_RESET_MODIFIED_CLEAR || +- reset_type == S390_RESET_LOAD_NORMAL) { ++ reset_type == S390_RESET_LOAD_NORMAL || ++ reset_type == S390_RESET_PV) { + /* ignore -no-reboot, send no event */ + qemu_system_reset_request(SHUTDOWN_CAUSE_SUBSYSTEM_RESET); + } else { +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch b/SOURCES/kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch new file mode 100644 index 0000000..764ceb1 --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch @@ -0,0 +1,92 @@ +From f3594f3d84a7442c194b1b9fd288e7414540ec0f Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:20 -0400 +Subject: [PATCH 38/42] s390x: pv: Fix KVM_PV_PREP_RESET command wrapper name +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-39-thuth@redhat.com> +Patchwork-id: 97051 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 38/38] s390x: pv: Fix KVM_PV_PREP_RESET command wrapper name +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Upstream: Merged in https://github.com/cohuck/qemu/tree/s390-next + +s390_pv_perf_clear_reset() is not a very helpful name since that +function needs to be called for a normal and a clear reset via +diag308. + +Let's instead name it s390_pv_prep_reset() which reflects the purpose +of the function a bit better. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Message-Id: <20200505124159.24099-1-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit f9628f3f6db341751002dac3be18610fa77c01ad) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/pv.c | 2 +- + hw/s390x/s390-virtio-ccw.c | 2 +- + include/hw/s390x/pv.h | 4 ++-- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index f11868e865..ab3a2482aa 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -88,7 +88,7 @@ int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) + return s390_pv_cmd(KVM_PV_UNPACK, &args); + } + +-void s390_pv_perf_clear_reset(void) ++void s390_pv_prep_reset(void) + { + s390_pv_cmd_exit(KVM_PV_PREP_RESET, NULL); + } +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 07773a12b2..e6ed13b649 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -402,7 +402,7 @@ static void s390_pv_prepare_reset(S390CcwMachineState *ms) + s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); + } + s390_pv_unshare(); +- s390_pv_perf_clear_reset(); ++ s390_pv_prep_reset(); + } + + static void s390_machine_reset(MachineState *machine) +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 522ca6a04e..aee758bc2d 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -39,7 +39,7 @@ int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); + int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); +-void s390_pv_perf_clear_reset(void); ++void s390_pv_prep_reset(void); + int s390_pv_verify(void); + void s390_pv_unshare(void); + void s390_pv_inject_reset_error(CPUState *cs); +@@ -49,7 +49,7 @@ static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } + static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } +-static inline void s390_pv_perf_clear_reset(void) {} ++static inline void s390_pv_prep_reset(void) {} + static inline int s390_pv_verify(void) { return 0; } + static inline void s390_pv_unshare(void) {} + static inline void s390_pv_inject_reset_error(CPUState *cs) {}; +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pv-Retry-ioctls-on-EINTR.patch b/SOURCES/kvm-s390x-pv-Retry-ioctls-on-EINTR.patch new file mode 100644 index 0000000..65208c7 --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Retry-ioctls-on-EINTR.patch @@ -0,0 +1,57 @@ +From 1678288d945906d83d7adae109b842080aebaf19 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:18 -0400 +Subject: [PATCH 36/42] s390x/pv: Retry ioctls on -EINTR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-37-thuth@redhat.com> +Patchwork-id: 97055 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 36/38] s390x/pv: Retry ioctls on -EINTR +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Christian Borntraeger + +PV_ENABLE (and maybe others) might return -EINTR when a signal is +pending. See the Linux kernel patch "s390/gmap: return proper error code +on ksm unsharing" for details. Let us retry the ioctl in that case. + +Fixes: c3347ed0d2ee ("s390x: protvirt: Support unpack facility") +Reported-by: Marc Hartmayer +Acked-by: Janosch Frank +Tested-by: Marc Hartmayer +Signed-off-by: Christian Borntraeger +Message-Id: <20200327124616.34866-1-borntraeger@de.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit e8d12a55f6d3e577455b02f15907c460578c689b) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/pv.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index a40a844806..cb0dce4a4f 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -23,7 +23,11 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + .cmd = cmd, + .data = (uint64_t)data, + }; +- int rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); ++ int rc; ++ ++ do { ++ rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); ++ } while (rc == -EINTR); + + if (rc) { + error_report("KVM PV command %d (%s) failed: header rc %x rrc %x " +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch new file mode 100644 index 0000000..e78f4da --- /dev/null +++ b/SOURCES/kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch @@ -0,0 +1,150 @@ +From 0db8d909a2f3c53d12b0ae12307965f9a8193dbc Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:19 -0400 +Subject: [PATCH 37/42] s390x/s390-virtio-ccw: Fix build on systems without KVM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-38-thuth@redhat.com> +Patchwork-id: 97047 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 37/38] s390x/s390-virtio-ccw: Fix build on systems without KVM +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Christian Borntraeger + +linux/kvm.h is not available on all platforms. Let us move +s390_machine_inject_pv_error into pv.c as it uses KVM structures. +Also rename the function to s390_pv_inject_reset_error. + +While at it, ipl.h needs an include for "exec/address-spaces.h" +as it uses address_space_memory. + +Fixes: c3347ed0d2ee ("s390x: protvirt: Support unpack facility") +Reported-by: Bruce Rogers +Signed-off-by: Christian Borntraeger +Message-Id: <20200406100158.5940-2-borntraeger@de.ibm.com> +Reviewed-by: David Hildenbrand +Signed-off-by: Cornelia Huck +(cherry picked from commit fbc1384ccd48fa7c0c38f950adf7992a4fb6042e) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/ipl.h | 1 + + hw/s390x/pv.c | 11 +++++++++++ + hw/s390x/s390-virtio-ccw.c | 12 +----------- + include/hw/s390x/pv.h | 3 +++ + 4 files changed, 16 insertions(+), 11 deletions(-) + +diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h +index 89b3044d7a..53cc9eb5ac 100644 +--- a/hw/s390x/ipl.h ++++ b/hw/s390x/ipl.h +@@ -14,6 +14,7 @@ + #define HW_S390_IPL_H + + #include "cpu.h" ++#include "exec/address-spaces.h" + #include "hw/qdev-core.h" + + struct IPLBlockPVComp { +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index cb0dce4a4f..f11868e865 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -13,8 +13,10 @@ + + #include + ++#include "cpu.h" + #include "qemu/error-report.h" + #include "sysemu/kvm.h" ++#include "hw/s390x/ipl.h" + #include "hw/s390x/pv.h" + + static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) +@@ -100,3 +102,12 @@ void s390_pv_unshare(void) + { + s390_pv_cmd_exit(KVM_PV_UNSHARE_ALL, NULL); + } ++ ++void s390_pv_inject_reset_error(CPUState *cs) ++{ ++ int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; ++ CPUS390XState *env = &S390_CPU(cs)->env; ++ ++ /* Report that we are unable to enter protected mode */ ++ env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; ++} +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index c08e42bda1..07773a12b2 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -44,7 +44,6 @@ + #include "sysemu/sysemu.h" + #include "sysemu/balloon.h" + #include "hw/s390x/pv.h" +-#include + #include "migration/blocker.h" + + static Error *pv_mig_blocker; +@@ -391,15 +390,6 @@ out_err: + return rc; + } + +-static void s390_machine_inject_pv_error(CPUState *cs) +-{ +- int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; +- CPUS390XState *env = &S390_CPU(cs)->env; +- +- /* Report that we are unable to enter protected mode */ +- env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; +-} +- + static void s390_pv_prepare_reset(S390CcwMachineState *ms) + { + CPUState *cs; +@@ -485,7 +475,7 @@ static void s390_machine_reset(MachineState *machine) + run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL); + + if (s390_machine_protect(ms)) { +- s390_machine_inject_pv_error(cs); ++ s390_pv_inject_reset_error(cs); + /* + * Continue after the diag308 so the guest knows something + * went wrong. +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index c6cb360f2f..522ca6a04e 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -13,6 +13,7 @@ + #define HW_S390_PV_H + + #ifdef CONFIG_KVM ++#include "cpu.h" + #include "hw/s390x/s390-virtio-ccw.h" + + static inline bool s390_is_pv(void) +@@ -41,6 +42,7 @@ int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); + void s390_pv_perf_clear_reset(void); + int s390_pv_verify(void); + void s390_pv_unshare(void); ++void s390_pv_inject_reset_error(CPUState *cs); + #else /* CONFIG_KVM */ + static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_vm_enable(void) { return 0; } +@@ -50,6 +52,7 @@ static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { + static inline void s390_pv_perf_clear_reset(void) {} + static inline int s390_pv_verify(void) { return 0; } + static inline void s390_pv_unshare(void) {} ++static inline void s390_pv_inject_reset_error(CPUState *cs) {}; + #endif /* CONFIG_KVM */ + + #endif /* HW_S390_PV_H */ +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-sigp-Fix-sense-running-reporting.patch b/SOURCES/kvm-s390x-sigp-Fix-sense-running-reporting.patch new file mode 100644 index 0000000..7143964 --- /dev/null +++ b/SOURCES/kvm-s390x-sigp-Fix-sense-running-reporting.patch @@ -0,0 +1,49 @@ +From a2befb24c10f58ce6c27d242f3b88afee1f77ec8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 7 Jul 2020 09:35:31 -0400 +Subject: [PATCH 2/4] s390x: sigp: Fix sense running reporting + +RH-Author: Thomas Huth +Message-id: <20200707093532.22456-2-thuth@redhat.com> +Patchwork-id: 97920 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/2] s390x: sigp: Fix sense running reporting +Bugzilla: 1854092 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +The logic was inverted and reported running if the cpu was stopped. +Let's fix that. + +Signed-off-by: Janosch Frank +Fixes: d1b468bc8869 ("s390x/tcg: implement SIGP SENSE RUNNING STATUS") +Reviewed-by: David Hildenbrand +Message-Id: <20200124134818.9981-1-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 4103500e2fa934a6995e4cedab37423e606715bf) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/sigp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c +index 727875bb4a..c604f17710 100644 +--- a/target/s390x/sigp.c ++++ b/target/s390x/sigp.c +@@ -348,9 +348,9 @@ static void sigp_sense_running(S390CPU *dst_cpu, SigpInfo *si) + + /* If halted (which includes also STOPPED), it is not running */ + if (CPU(dst_cpu)->halted) { +- si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; +- } else { + set_sigp_status(si, SIGP_STAT_NOT_RUNNING); ++ } else { ++ si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch b/SOURCES/kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch new file mode 100644 index 0000000..b6ac314 --- /dev/null +++ b/SOURCES/kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch @@ -0,0 +1,57 @@ +From 0c85e86077b42547034ec6e8330a3e61d79b97ee Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 7 Jul 2020 09:35:32 -0400 +Subject: [PATCH 3/4] s390x/tcg: clear local interrupts on reset normal + +RH-Author: Thomas Huth +Message-id: <20200707093532.22456-3-thuth@redhat.com> +Patchwork-id: 97919 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/2] s390x/tcg: clear local interrupts on reset normal +Bugzilla: 1854092 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Cornelia Huck + +We neglected to clean up pending interrupts and emergency signals; +fix that. + +Message-Id: <20191206135404.16051-1-cohuck@redhat.com> +Signed-off-by: Cornelia Huck +Reviewed-by: David Hildenbrand +(cherry picked from commit bcf88d56efec4ffc153bbe98d11b689a5ebe1a91) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index edf8391504..a48e655c4d 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -98,10 +98,6 @@ struct CPUS390XState { + + uint64_t cregs[16]; /* control registers */ + +- int pending_int; +- uint16_t external_call_addr; +- DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS); +- + uint64_t ckc; + uint64_t cputm; + uint32_t todpr; +@@ -117,6 +113,10 @@ struct CPUS390XState { + struct {} start_normal_reset_fields; + uint8_t riccb[64]; /* runtime instrumentation control */ + ++ int pending_int; ++ uint16_t external_call_addr; ++ DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS); ++ + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x.conf b/SOURCES/kvm-s390x.conf new file mode 100644 index 0000000..d82b818 --- /dev/null +++ b/SOURCES/kvm-s390x.conf @@ -0,0 +1,19 @@ +# User changes in this file are preserved across upgrades. +# +# Setting "modprobe kvm nested=1" only enables Nested Virtualization until +# the next reboot or module reload. Uncomment the option below to enable +# the feature permanently. +# +#options kvm nested=1 +# +# +# Setting "modprobe kvm hpage=1" only enables Huge Page Backing (1MB) +# support until the next reboot or module reload. Uncomment the option +# below to enable the feature permanently. +# +# Note: - Incompatible with "nested=1". Loading the module will fail. +# - Dirty page logging will be performed on a 1MB (not 4KB) basis, +# which can result in a lot of data having to be transferred during +# migration, and therefore taking very long to converge. +# +#options kvm hpage=1 diff --git a/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch b/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch new file mode 100644 index 0000000..b22b2b2 --- /dev/null +++ b/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch @@ -0,0 +1,79 @@ +From 2edf1c3a2ee65ef0e4f6bd4f8de650e529f5481d Mon Sep 17 00:00:00 2001 +From: Eduardo Otubo +Date: Tue, 15 Sep 2020 15:15:49 -0400 +Subject: [PATCH] seccomp: fix killing of whole process instead of thread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eduardo Otubo +Message-id: <20200915151549.5007-1-otubo@redhat.com> +Patchwork-id: 98392 +O-Subject: [RHEL-AV-8.3.0/RHEL-8.3.0 qemu-kvm PATCH] seccomp: fix killing of whole process instead of thread +Bugzilla: 1890885 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Daniel P. Berrange + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1752376 +BRANCH: rhel-av-8.3.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31339152 + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1752376 +BRANCH: rhel-8.3.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31339111 + +From: Daniel P. Berrangé + +Back in 2018 we introduced support for killing the whole QEMU process +instead of just one thread, when a seccomp rule is violated: + + commit bda08a5764d470f101fa38635d30b41179a313e1 + Author: Marc-André Lureau + Date: Wed Aug 22 19:02:48 2018 +0200 + + seccomp: prefer SCMP_ACT_KILL_PROCESS if available + +Fast forward a year and we introduced a patch to avoid killing the +process for resource control syscalls tickled by Mesa. + + commit 9a1565a03b79d80b236bc7cc2dbce52a2ef3a1b8 + Author: Daniel P. Berrangé + Date: Wed Mar 13 09:49:03 2019 +0000 + + seccomp: don't kill process for resource control syscalls + +Unfortunately a logic bug effectively reverted the first commit +mentioned so that we go back to only killing the thread, not the whole +process. + +Signed-off-by: Daniel P. Berrangé +Reviewed-by: Stefan Hajnoczi +Acked-by: Eduardo Otubo +(cherry picked from commit e474e3aacf4276eb0781d11c45e2fab996f9dc56) +Signed-off-by: Eduardo Otubo +Signed-off-by: Danilo C. L. de Paula +--- + qemu-seccomp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/qemu-seccomp.c b/qemu-seccomp.c +index e0a1829b3dd..8325ecb766e 100644 +--- a/qemu-seccomp.c ++++ b/qemu-seccomp.c +@@ -136,8 +136,9 @@ static uint32_t qemu_seccomp_get_action(int set) + + if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { + kill_process = 1; ++ } else { ++ kill_process = 0; + } +- kill_process = 0; + } + if (kill_process == 1) { + return SCMP_ACT_KILL_PROCESS; +-- +2.27.0 + diff --git a/SOURCES/kvm-setup b/SOURCES/kvm-setup new file mode 100644 index 0000000..3bfedf6 --- /dev/null +++ b/SOURCES/kvm-setup @@ -0,0 +1,49 @@ +#! /bin/bash + +kvm_setup_powerpc () { + if grep '^platform[[:space:]]*:[[:space:]]*PowerNV' /proc/cpuinfo > /dev/null; then + # PowerNV platform, which is KVM HV capable + + if [ -z "$SUBCORES" ]; then + SUBCORES=1 + fi + + # Step 1. Load the KVM HVmodule + if ! modprobe -b kvm_hv; then + return + fi + + # On POWER8 a host core can only run threads of a single + # guest, meaning that SMT must be disabled on the host in + # order to run KVM guests. (Also applieds to POWER7, but we + # don't support that). + # + # POWER9 doesn't have this limitation (though it will for hash + # guests on radix host when that's implemented). So, only set + # up subcores and disable SMT for POWER*. + if grep '^cpu[[:space:]]*:[[:space:]]*POWER8' /proc/cpuinfo > /dev/null; then + # Step 2. Configure subcore mode + /usr/sbin/ppc64_cpu --subcores-per-core=$SUBCORES + + # Step 3. Disable SMT (multithreading) + /usr/sbin/ppc64_cpu --smt=off + fi + fi +} + +kvm_setup_s390x () { + if grep -q "^features.*sie" /proc/cpuinfo; then + modprobe kvm + fi +} + +case $(uname -m) in + ppc64|ppc64le) + kvm_setup_powerpc + ;; + s390x) + kvm_setup_s390x + ;; +esac + +exit 0 diff --git a/SOURCES/kvm-setup.service b/SOURCES/kvm-setup.service new file mode 100644 index 0000000..9c4bf97 --- /dev/null +++ b/SOURCES/kvm-setup.service @@ -0,0 +1,14 @@ +[Unit] +Description=Perform system configuration to prepare system to run KVM guests +# Offlining CPUs can cause irqbalance to throw warnings if it's running +Before=irqbalance.service +# libvirtd reads CPU topology at startup, so change it before +Before=libvirtd.service + +[Service] +Type=oneshot +EnvironmentFile=-/etc/sysconfig/kvm +ExecStart=/usr/lib/systemd/kvm-setup + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch new file mode 100644 index 0000000..6d8dfe1 --- /dev/null +++ b/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch @@ -0,0 +1,77 @@ +From 0f659af4870f151e25a7d2184b9a383bff58e3ba Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:57 +0100 +Subject: [PATCH 2/4] slirp: use correct size while emulating IRC commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-3-marcandre.lureau@redhat.com> +Patchwork-id: 93400 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] slirp: use correct size while emulating IRC commands +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size +'m->m_size' to write DCC commands via snprintf(3). This may +lead to OOB write access, because 'bptr' points somewhere in +the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) +size to avoid OOB access. + +Reported-by: Vishnu Dev TJ +Signed-off-by: Prasad J Pandit +Reviewed-by: Samuel Thibault +Message-Id: <20200109094228.79764-2-ppandit@redhat.com> + +(cherry picked from libslirp commit ce131029d6d4a405cb7d3ac6716d03e58fb4a5d9) +Signed-off-by: Marc-André Lureau + +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index cbecd64..cedbfb2 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, +@@ -788,8 +789,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, +@@ -799,8 +800,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch new file mode 100644 index 0000000..fe42f4f --- /dev/null +++ b/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch @@ -0,0 +1,71 @@ +From dfbfcf02738640ab83f7970e636b72b78f166675 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:58 +0100 +Subject: [PATCH 3/4] slirp: use correct size while emulating commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-4-marcandre.lureau@redhat.com> +Patchwork-id: 93401 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] slirp: use correct size while emulating commands +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +While emulating services in tcp_emu(), it uses 'mbuf' size +'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) +size to avoid possible OOB access. + +Signed-off-by: Prasad J Pandit +Signed-off-by: Samuel Thibault +Message-Id: <20200109094228.79764-3-ppandit@redhat.com> + +(cherry picked from commit 82ebe9c370a0e2970fb5695aa19aa5214a6a1c80) +Signed-off-by: Marc-André Lureau +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index cedbfb2..954d1a6 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, + n5, n6, x == 7 ? buff : ""); + return 1; +@@ -731,8 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + +@@ -758,8 +757,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) +- m->m_len = +- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; ++ m->m_len = snprintf(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)) + 1; + return 1; + + case EMU_IRC: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch b/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch new file mode 100644 index 0000000..d934712 --- /dev/null +++ b/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch @@ -0,0 +1,113 @@ +From f2aeed761d2dad14920fa08c977dc45564886d9b Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Fri, 3 Jan 2020 01:15:12 +0000 +Subject: [PATCH 1/5] spapr: Don't trigger a CAS reboot for XICS/XIVE mode + changeover +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200103011512.49129-2-dgibson@redhat.com> +Patchwork-id: 93261 +O-Subject: [RHEL-AV-4.2 qemu-kvm PATCH 1/1] spapr: Don't trigger a CAS reboot for XICS/XIVE mode changeover +Bugzilla: 1733893 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: David Gibson + +PAPR allows the interrupt controller used on a POWER9 machine (XICS or +XIVE) to be selected by the guest operating system, by using the +ibm,client-architecture-support (CAS) feature negotiation call. + +Currently, if the guest selects an interrupt controller different from the +one selected at initial boot, this causes the system to be reset with the +new model and the boot starts again. This means we run through the SLOF +boot process twice, as well as any other bootloader (e.g. grub) in use +before the OS calls CAS. This can be confusing and/or inconvenient for +users. + +Thanks to two fairly recent changes, we no longer need this reboot. 1) we +now completely regenerate the device tree when CAS is called (meaning we +don't need special case updates for all the device tree changes caused by +the interrupt controller mode change), 2) we now have explicit code paths +to activate and deactivate the different interrupt controllers, rather than +just implicitly calling those at machine reset time. + +We can therefore eliminate the reboot for changing irq mode, simply by +putting a call to spapr_irq_update_active_intc() before we call +spapr_h_cas_compose_response() (which gives the updated device tree to +the guest firmware and OS). + +Signed-off-by: David Gibson +Reviewed-by: Cedric Le Goater +Reviewed-by: Greg Kurz +(cherry picked from commit 8deb8019d696c75e6ecaee7545026b62aba2f1bb) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1733893 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_hcall.c | 33 +++++++++++++-------------------- + 1 file changed, 13 insertions(+), 20 deletions(-) + +diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c +index 140f05c..05a7ca2 100644 +--- a/hw/ppc/spapr_hcall.c ++++ b/hw/ppc/spapr_hcall.c +@@ -1767,21 +1767,10 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + } + spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); + spapr_ovec_cleanup(ov1_guest); +- if (!spapr->cas_reboot) { +- /* If spapr_machine_reset() did not set up a HPT but one is necessary +- * (because the guest isn't going to use radix) then set it up here. */ +- if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { +- /* legacy hash or new hash: */ +- spapr_setup_hpt_and_vrma(spapr); +- } +- spapr->cas_reboot = +- (spapr_h_cas_compose_response(spapr, args[1], args[2], +- ov5_updates) != 0); +- } + + /* +- * Ensure the guest asks for an interrupt mode we support; otherwise +- * terminate the boot. ++ * Ensure the guest asks for an interrupt mode we support; ++ * otherwise terminate the boot. + */ + if (guest_xive) { + if (!spapr->irq->xive) { +@@ -1797,14 +1786,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + } + } + +- /* +- * Generate a machine reset when we have an update of the +- * interrupt mode. Only required when the machine supports both +- * modes. +- */ ++ spapr_irq_update_active_intc(spapr); ++ + if (!spapr->cas_reboot) { +- spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT) +- && spapr->irq->xics && spapr->irq->xive; ++ /* If spapr_machine_reset() did not set up a HPT but one is necessary ++ * (because the guest isn't going to use radix) then set it up here. */ ++ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { ++ /* legacy hash or new hash: */ ++ spapr_setup_hpt_and_vrma(spapr); ++ } ++ spapr->cas_reboot = ++ (spapr_h_cas_compose_response(spapr, args[1], args[2], ++ ov5_updates) != 0); + } + + spapr_ovec_cleanup(ov5_updates); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch b/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch new file mode 100644 index 0000000..0aa782b --- /dev/null +++ b/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch @@ -0,0 +1,135 @@ +From eb121ffa97c1c25d7853d51b4c8209c0bb521deb Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Fri, 7 Feb 2020 00:57:04 +0000 +Subject: [PATCH 1/7] spapr: Enable DD2.3 accelerated count cache flush in + pseries-5.0 machine + +RH-Author: David Gibson +Message-id: <20200207005704.194428-1-dgibson@redhat.com> +Patchwork-id: 93737 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCHv2] spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine +Bugzilla: 1796240 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: David Gibson + +For POWER9 DD2.2 cpus, the best current Spectre v2 indirect branch +mitigation is "count cache disabled", which is configured with: + -machine cap-ibs=fixed-ccd +However, this option isn't available on DD2.3 CPUs with KVM, because they +don't have the count cache disabled. + +For POWER9 DD2.3 cpus, it is "count cache flush with assist", configured +with: + -machine cap-ibs=workaround,cap-ccf-assist=on +However this option isn't available on DD2.2 CPUs with KVM, because they +don't have the special CCF assist instruction this relies on. + +On current machine types, we default to "count cache flush w/o assist", +that is: + -machine cap-ibs=workaround,cap-ccf-assist=off +This runs, with mitigation on both DD2.2 and DD2.3 host cpus, but has a +fairly significant performance impact. + +It turns out we can do better. The special instruction that CCF assist +uses to trigger a count cache flush is a no-op on earlier CPUs, rather than +trapping or causing other badness. It doesn't, of itself, implement the +mitigation, but *if* we have count-cache-disabled, then the count cache +flush is unnecessary, and so using the count cache flush mitigation is +harmless. + +Therefore for the new pseries-5.0 machine type, enable cap-ccf-assist by +default. Along with that, suppress throwing an error if cap-ccf-assist +is selected but KVM doesn't support it, as long as KVM *is* giving us +count-cache-disabled. To allow TCG to work out of the box, even though it +doesn't implement the ccf flush assist, downgrade the error in that case to +a warning. This matches several Spectre mitigations where we allow TCG +to operate for debugging, since we don't really make guarantees about TCG +security properties anyway. + +While we're there, make the TCG warning for this case match that for other +mitigations. + +Signed-off-by: David Gibson +Tested-by: Michael Ellerman +(cherry picked from commit 37965dfe4dffa3ac49438337417608e7f346b58a) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + hw/ppc/spapr.c + +Adjusted machine version compatibility code to the RHEL machine types +rather than the upstream machine types. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1796240 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=26285002 +Branch: rhel-av-8.2.0 +Upstream: Merged for qemu-5.0 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 4 +++- + hw/ppc/spapr_caps.c | 21 +++++++++++++++++---- + 2 files changed, 20 insertions(+), 5 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index c12862d..a330f03 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4440,7 +4440,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ + smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; +- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; + spapr_caps_add_properties(smc, &error_abort); + smc->irq = &spapr_irq_dual; + smc->dr_phb_enabled = true; +@@ -4904,6 +4904,8 @@ static void spapr_machine_rhel810_class_options(MachineClass *mc) + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + } + + DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 805f385..6e6fb28 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -492,11 +492,24 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, + uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); + + if (tcg_enabled() && val) { +- /* TODO - for now only allow broken for TCG */ +- error_setg(errp, +-"Requested count cache flush assist capability level not supported by tcg," +- " try appending -machine cap-ccf-assist=off"); ++ /* TCG doesn't implement anything here, but allow with a warning */ ++ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); + } else if (kvm_enabled() && (val > kvm_val)) { ++ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); ++ ++ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { ++ /* ++ * If we don't have CCF assist on the host, the assist ++ * instruction is a harmless no-op. It won't correctly ++ * implement the cache count flush *but* if we have ++ * count-cache-disabled in the host, that flush is ++ * unnnecessary. So, specifically allow this case. This ++ * allows us to have better performance on POWER9 DD2.3, ++ * while still working on POWER9 DD2.2 and POWER8 host ++ * cpus. ++ */ ++ return; ++ } + error_setg(errp, + "Requested count cache flush assist capability level not supported by kvm," + " try appending -machine cap-ccf-assist=off"); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch b/SOURCES/kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch new file mode 100644 index 0000000..7c48718 --- /dev/null +++ b/SOURCES/kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch @@ -0,0 +1,213 @@ +From 5aea41b56f07f586e0f56a5c8b3e8443e485cd77 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 5 Jun 2020 07:41:09 -0400 +Subject: [PATCH 39/42] spapr: Pass the maximum number of vCPUs to the KVM + interrupt controller +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200605074111.2185-2-thuth@redhat.com> +Patchwork-id: 97368 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/3] spapr: Pass the maximum number of vCPUs to the KVM interrupt controller +Bugzilla: 1756946 +RH-Acked-by: Greg Kurz +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Greg Kurz + +The XIVE and XICS-on-XIVE KVM devices on POWER9 hosts can greatly reduce +their consumption of some scarce HW resources, namely Virtual Presenter +identifiers, if they know the maximum number of vCPUs that may run in the +VM. + +Prepare ground for this by passing the value down to xics_kvm_connect() +and kvmppc_xive_connect(). This is purely mechanical, no functional +change. + +Signed-off-by: Greg Kurz +Message-Id: <157478678301.67101.2717368060417156338.stgit@bahia.tlslab.ibm.com> +Reviewed-by: Cédric Le Goater +Signed-off-by: David Gibson +(cherry picked from commit 4ffb7496881ec361deaf1f51c41a933bde3cbf7b) +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/spapr_xive.c | 6 ++++-- + hw/intc/spapr_xive_kvm.c | 3 ++- + hw/intc/xics_kvm.c | 3 ++- + hw/intc/xics_spapr.c | 5 +++-- + hw/ppc/spapr_irq.c | 8 +++++--- + include/hw/ppc/spapr_irq.h | 10 ++++++++-- + include/hw/ppc/spapr_xive.h | 3 ++- + include/hw/ppc/xics_spapr.h | 3 ++- + 8 files changed, 28 insertions(+), 13 deletions(-) + +diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c +index 9cb8d38a3b..a570e6e90a 100644 +--- a/hw/intc/spapr_xive.c ++++ b/hw/intc/spapr_xive.c +@@ -651,12 +651,14 @@ static void spapr_xive_dt(SpaprInterruptController *intc, uint32_t nr_servers, + plat_res_int_priorities, sizeof(plat_res_int_priorities))); + } + +-static int spapr_xive_activate(SpaprInterruptController *intc, Error **errp) ++static int spapr_xive_activate(SpaprInterruptController *intc, ++ uint32_t nr_servers, Error **errp) + { + SpaprXive *xive = SPAPR_XIVE(intc); + + if (kvm_enabled()) { +- int rc = spapr_irq_init_kvm(kvmppc_xive_connect, intc, errp); ++ int rc = spapr_irq_init_kvm(kvmppc_xive_connect, intc, nr_servers, ++ errp); + if (rc < 0) { + return rc; + } +diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c +index 08012ac7cd..c1c837a764 100644 +--- a/hw/intc/spapr_xive_kvm.c ++++ b/hw/intc/spapr_xive_kvm.c +@@ -740,7 +740,8 @@ static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, + * All the XIVE memory regions are now backed by mappings from the KVM + * XIVE device. + */ +-int kvmppc_xive_connect(SpaprInterruptController *intc, Error **errp) ++int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, ++ Error **errp) + { + SpaprXive *xive = SPAPR_XIVE(intc); + XiveSource *xsrc = &xive->source; +diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c +index 954c424b36..a1f1b7b0d3 100644 +--- a/hw/intc/xics_kvm.c ++++ b/hw/intc/xics_kvm.c +@@ -342,7 +342,8 @@ void ics_kvm_set_irq(ICSState *ics, int srcno, int val) + } + } + +-int xics_kvm_connect(SpaprInterruptController *intc, Error **errp) ++int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers, ++ Error **errp) + { + ICSState *ics = ICS_SPAPR(intc); + int rc; +diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c +index b3705dab0e..8ae4f41459 100644 +--- a/hw/intc/xics_spapr.c ++++ b/hw/intc/xics_spapr.c +@@ -422,10 +422,11 @@ static int xics_spapr_post_load(SpaprInterruptController *intc, int version_id) + return 0; + } + +-static int xics_spapr_activate(SpaprInterruptController *intc, Error **errp) ++static int xics_spapr_activate(SpaprInterruptController *intc, ++ uint32_t nr_servers, Error **errp) + { + if (kvm_enabled()) { +- return spapr_irq_init_kvm(xics_kvm_connect, intc, errp); ++ return spapr_irq_init_kvm(xics_kvm_connect, intc, nr_servers, errp); + } + return 0; + } +diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c +index d6bb7fd2d6..9da423658a 100644 +--- a/hw/ppc/spapr_irq.c ++++ b/hw/ppc/spapr_irq.c +@@ -70,15 +70,16 @@ void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) + bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); + } + +-int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **), ++int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, + SpaprInterruptController *intc, ++ uint32_t nr_servers, + Error **errp) + { + MachineState *machine = MACHINE(qdev_get_machine()); + Error *local_err = NULL; + + if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { +- if (fn(intc, &local_err) < 0) { ++ if (fn(intc, nr_servers, &local_err) < 0) { + if (machine_kernel_irqchip_required(machine)) { + error_prepend(&local_err, + "kernel_irqchip requested but unavailable: "); +@@ -495,6 +496,7 @@ static void set_active_intc(SpaprMachineState *spapr, + SpaprInterruptController *new_intc) + { + SpaprInterruptControllerClass *sicc; ++ uint32_t nr_servers = spapr_max_server_number(spapr); + + assert(new_intc); + +@@ -512,7 +514,7 @@ static void set_active_intc(SpaprMachineState *spapr, + + sicc = SPAPR_INTC_GET_CLASS(new_intc); + if (sicc->activate) { +- sicc->activate(new_intc, &error_fatal); ++ sicc->activate(new_intc, nr_servers, &error_fatal); + } + + spapr->active_intc = new_intc; +diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h +index ff814d13de..ca8cb44213 100644 +--- a/include/hw/ppc/spapr_irq.h ++++ b/include/hw/ppc/spapr_irq.h +@@ -43,7 +43,8 @@ typedef struct SpaprInterruptController SpaprInterruptController; + typedef struct SpaprInterruptControllerClass { + InterfaceClass parent; + +- int (*activate)(SpaprInterruptController *intc, Error **errp); ++ int (*activate)(SpaprInterruptController *intc, uint32_t nr_servers, ++ Error **errp); + void (*deactivate)(SpaprInterruptController *intc); + + /* +@@ -98,8 +99,13 @@ qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq); + int spapr_irq_post_load(SpaprMachineState *spapr, int version_id); + void spapr_irq_reset(SpaprMachineState *spapr, Error **errp); + int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp); +-int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **), ++ ++typedef int (*SpaprInterruptControllerInitKvm)(SpaprInterruptController *, ++ uint32_t, Error **); ++ ++int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, + SpaprInterruptController *intc, ++ uint32_t nr_servers, + Error **errp); + + /* +diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h +index 742b7e834f..3a103c224d 100644 +--- a/include/hw/ppc/spapr_xive.h ++++ b/include/hw/ppc/spapr_xive.h +@@ -66,7 +66,8 @@ int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, + /* + * KVM XIVE device helpers + */ +-int kvmppc_xive_connect(SpaprInterruptController *intc, Error **errp); ++int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, ++ Error **errp); + void kvmppc_xive_disconnect(SpaprInterruptController *intc); + void kvmppc_xive_reset(SpaprXive *xive, Error **errp); + void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, +diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h +index 28b87038c8..1c65c96e3c 100644 +--- a/include/hw/ppc/xics_spapr.h ++++ b/include/hw/ppc/xics_spapr.h +@@ -32,7 +32,8 @@ + #define TYPE_ICS_SPAPR "ics-spapr" + #define ICS_SPAPR(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_SPAPR) + +-int xics_kvm_connect(SpaprInterruptController *intc, Error **errp); ++int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers, ++ Error **errp); + void xics_kvm_disconnect(SpaprInterruptController *intc); + bool xics_kvm_has_broken_disconnect(SpaprMachineState *spapr); + +-- +2.27.0 + diff --git a/SOURCES/kvm-target-arm-Fix-PAuth-sbox-functions.patch b/SOURCES/kvm-target-arm-Fix-PAuth-sbox-functions.patch new file mode 100644 index 0000000..0e08184 --- /dev/null +++ b/SOURCES/kvm-target-arm-Fix-PAuth-sbox-functions.patch @@ -0,0 +1,65 @@ +From b8c8288a65146952cdfe7d5f0cd96734c9de8ee1 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Thu, 7 May 2020 17:57:08 +0100 +Subject: [PATCH 1/7] target/arm: Fix PAuth sbox functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200507175708.1165177-2-jmaloy@redhat.com> +Patchwork-id: 96341 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] target/arm: Fix PAuth sbox functions +Bugzilla: 1813940 +RH-Acked-by: Andrew Jones +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +From: Vincent Dehors + +In the PAC computation, sbox was applied over wrong bits. +As this is a 4-bit sbox, bit index should be incremented by 4 instead of 16. + +Test vector from QARMA paper (https://eprint.iacr.org/2016/444.pdf) was +used to verify one computation of the pauth_computepac() function which +uses sbox2. + +Launchpad: https://bugs.launchpad.net/bugs/1859713 +Reviewed-by: Richard Henderson +Signed-off-by: Vincent DEHORS +Signed-off-by: Adrien GRASSEIN +Message-id: 20200116230809.19078-2-richard.henderson@linaro.org +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit de0b1bae6461f67243282555475f88b2384a1eb9) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/pauth_helper.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c +index d3194f2..0a5f41e 100644 +--- a/target/arm/pauth_helper.c ++++ b/target/arm/pauth_helper.c +@@ -89,7 +89,7 @@ static uint64_t pac_sub(uint64_t i) + uint64_t o = 0; + int b; + +- for (b = 0; b < 64; b += 16) { ++ for (b = 0; b < 64; b += 4) { + o |= (uint64_t)sub[(i >> b) & 0xf] << b; + } + return o; +@@ -104,7 +104,7 @@ static uint64_t pac_inv_sub(uint64_t i) + uint64_t o = 0; + int b; + +- for (b = 0; b < 64; b += 16) { ++ for (b = 0; b < 64; b += 4) { + o |= (uint64_t)inv_sub[(i >> b) & 0xf] << b; + } + return o; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch b/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch new file mode 100644 index 0000000..febea10 --- /dev/null +++ b/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch @@ -0,0 +1,298 @@ +From d8871ae2842531130c9b333e7c06a6a5d1561286 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 24 Jan 2020 09:14:34 +0100 +Subject: [PATCH 001/116] target/arm/arch_dump: Add SVE notes + +RH-Author: Andrew Jones +Message-id: <20200124091434.15021-2-drjones@redhat.com> +Patchwork-id: 93443 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/arm/arch_dump: Add SVE notes +Bugzilla: 1725084 +RH-Acked-by: Auger Eric +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725084 + +Author: Andrew Jones +Date: Thu, 23 Jan 2020 15:22:40 +0000 + + target/arm/arch_dump: Add SVE notes + + When dumping a guest with dump-guest-memory also dump the SVE + registers if they are in use. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101832.18781-1-drjones@redhat.com + [PMM: fixed checkpatch nits] + Signed-off-by: Peter Maydell + +(cherry picked from commit 538baab245ca881e6a6ff720b5133f3ad1fcaafc) +Signed-off-by: Miroslav Rezanina +--- + include/elf.h | 1 + + target/arm/arch_dump.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++- + target/arm/cpu.h | 25 ++++++++++ + target/arm/kvm64.c | 24 ---------- + 4 files changed, 148 insertions(+), 26 deletions(-) + +diff --git a/include/elf.h b/include/elf.h +index 3501e0c..8fbfe60 100644 +--- a/include/elf.h ++++ b/include/elf.h +@@ -1650,6 +1650,7 @@ typedef struct elf64_shdr { + #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ + #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ + #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ ++#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ + + /* + * Physical entry point into the kernel. +diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c +index 26a2c09..2345dec 100644 +--- a/target/arm/arch_dump.c ++++ b/target/arm/arch_dump.c +@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { + + QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); + ++/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ ++struct aarch64_user_sve_header { ++ uint32_t size; ++ uint32_t max_size; ++ uint16_t vl; ++ uint16_t max_vl; ++ uint16_t flags; ++ uint16_t reserved; ++} QEMU_PACKED; ++ + struct aarch64_note { + Elf64_Nhdr hdr; + char name[8]; /* align_up(sizeof("CORE"), 4) */ + union { + struct aarch64_elf_prstatus prstatus; + struct aarch64_user_vfp_state vfp; ++ struct aarch64_user_sve_header sve; + }; + } QEMU_PACKED; + +@@ -76,6 +87,8 @@ struct aarch64_note { + (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) + #define AARCH64_PRFPREG_NOTE_SIZE \ + (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) ++#define AARCH64_SVE_NOTE_SIZE(env) \ ++ (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) + + static void aarch64_note_init(struct aarch64_note *note, DumpState *s, + const char *name, Elf64_Word namesz, +@@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, + return 0; + } + ++#ifdef TARGET_AARCH64 ++static off_t sve_zreg_offset(uint32_t vq, int n) ++{ ++ off_t off = sizeof(struct aarch64_user_sve_header); ++ return ROUND_UP(off, 16) + vq * 16 * n; ++} ++ ++static off_t sve_preg_offset(uint32_t vq, int n) ++{ ++ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; ++} ++ ++static off_t sve_fpsr_offset(uint32_t vq) ++{ ++ off_t off = sve_preg_offset(vq, 17); ++ return ROUND_UP(off, 16); ++} ++ ++static off_t sve_fpcr_offset(uint32_t vq) ++{ ++ return sve_fpsr_offset(vq) + sizeof(uint32_t); ++} ++ ++static uint32_t sve_current_vq(CPUARMState *env) ++{ ++ return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; ++} ++ ++static size_t sve_size_vq(uint32_t vq) ++{ ++ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); ++ return ROUND_UP(off, 16); ++} ++ ++static size_t sve_size(CPUARMState *env) ++{ ++ return sve_size_vq(sve_current_vq(env)); ++} ++ ++static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, ++ CPUARMState *env, int cpuid, ++ DumpState *s) ++{ ++ struct aarch64_note *note; ++ ARMCPU *cpu = env_archcpu(env); ++ uint32_t vq = sve_current_vq(env); ++ uint64_t tmp[ARM_MAX_VQ * 2], *r; ++ uint32_t fpr; ++ uint8_t *buf; ++ int ret, i; ++ ++ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); ++ buf = (uint8_t *)¬e->sve; ++ ++ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); ++ ++ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); ++ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); ++ note->sve.vl = cpu_to_dump16(s, vq * 16); ++ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); ++ note->sve.flags = cpu_to_dump16(s, 1); ++ ++ for (i = 0; i < 32; ++i) { ++ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); ++ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); ++ } ++ ++ for (i = 0; i < 17; ++i) { ++ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], ++ DIV_ROUND_UP(vq * 2, 8)); ++ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); ++ } ++ ++ fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); ++ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); ++ ++ fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); ++ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); ++ ++ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); ++ g_free(note); ++ ++ if (ret < 0) { ++ return -1; ++ } ++ ++ return 0; ++} ++#endif ++ + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + int cpuid, void *opaque) + { + struct aarch64_note note; +- CPUARMState *env = &ARM_CPU(cs)->env; ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; + DumpState *s = opaque; + uint64_t pstate, sp; + int ret, i; +@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + return -1; + } + +- return aarch64_write_elf64_prfpreg(f, env, cpuid, s); ++ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); ++ if (ret) { ++ return ret; ++ } ++ ++#ifdef TARGET_AARCH64 ++ if (cpu_isar_feature(aa64_sve, cpu)) { ++ ret = aarch64_write_elf64_sve(f, env, cpuid, s); ++ } ++#endif ++ ++ return ret; + } + + /* struct pt_regs from arch/arm/include/asm/ptrace.h */ +@@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + if (class == ELFCLASS64) { + note_size = AARCH64_PRSTATUS_NOTE_SIZE; + note_size += AARCH64_PRFPREG_NOTE_SIZE; ++#ifdef TARGET_AARCH64 ++ if (cpu_isar_feature(aa64_sve, cpu)) { ++ note_size += AARCH64_SVE_NOTE_SIZE(env); ++ } ++#endif + } else { + note_size = ARM_PRSTATUS_NOTE_SIZE; + if (arm_feature(env, ARM_FEATURE_VFP)) { +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 83a809d..82dd3cc 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); + void aarch64_sve_change_el(CPUARMState *env, int old_el, + int new_el, bool el0_a64); + void aarch64_add_sve_properties(Object *obj); ++ ++/* ++ * SVE registers are encoded in KVM's memory in an endianness-invariant format. ++ * The byte at offset i from the start of the in-memory representation contains ++ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the ++ * lowest offsets are stored in the lowest memory addresses, then that nearly ++ * matches QEMU's representation, which is to use an array of host-endian ++ * uint64_t's, where the lower offsets are at the lower indices. To complete ++ * the translation we just need to byte swap the uint64_t's on big-endian hosts. ++ */ ++static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) ++{ ++#ifdef HOST_WORDS_BIGENDIAN ++ int i; ++ ++ for (i = 0; i < nr; ++i) { ++ dst[i] = bswap64(src[i]); ++ } ++ ++ return dst; ++#else ++ return src; ++#endif ++} ++ + #else + static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } + static inline void aarch64_sve_change_el(CPUARMState *env, int o, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 876184b..e2da756 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs) + } + + /* +- * SVE registers are encoded in KVM's memory in an endianness-invariant format. +- * The byte at offset i from the start of the in-memory representation contains +- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the +- * lowest offsets are stored in the lowest memory addresses, then that nearly +- * matches QEMU's representation, which is to use an array of host-endian +- * uint64_t's, where the lower offsets are at the lower indices. To complete +- * the translation we just need to byte swap the uint64_t's on big-endian hosts. +- */ +-static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) +-{ +-#ifdef HOST_WORDS_BIGENDIAN +- int i; +- +- for (i = 0; i < nr; ++i) { +- dst[i] = bswap64(src[i]); +- } +- +- return dst; +-#else +- return src; +-#endif +-} +- +-/* + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard + * code the slice index to zero for now as it's unlikely we'll need more than +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch new file mode 100644 index 0000000..601b8c4 --- /dev/null +++ b/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch @@ -0,0 +1,281 @@ +From 730f72105b478553c4f22555c29b0f64224ff914 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:14 +0000 +Subject: [PATCH 12/15] target/arm/cpu: Add the kvm-no-adjvtime CPU property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-6-drjones@redhat.com> +Patchwork-id: 93623 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/5] target/arm/cpu: Add the kvm-no-adjvtime CPU property +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/cpu: Add the kvm-no-adjvtime CPU property + + kvm-no-adjvtime is a KVM specific CPU property and a first of its + kind. To accommodate it we also add kvm_arm_add_vcpu_properties() + and a KVM specific CPU properties description to the CPU features + document. + + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-7-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit dea101a1ae9968c9fec6ab0291489dad7c49f36f) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + Dropped the second hunk of the hw/arm/virt.c changes + as they would patch dead code. + +Signed-off-by: Danilo C. L. de Paula +--- + docs/arm-cpu-features.rst | 37 ++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 5 +++++ + include/hw/arm/virt.h | 1 + + target/arm/cpu.c | 2 ++ + target/arm/cpu64.c | 1 + + target/arm/kvm.c | 28 ++++++++++++++++++++++++++++ + target/arm/kvm_arm.h | 11 +++++++++++ + target/arm/monitor.c | 1 + + tests/arm-cpu-features.c | 4 ++++ + 9 files changed, 89 insertions(+), 1 deletion(-) + +diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst +index 1b367e2..45d1eb6 100644 +--- a/docs/arm-cpu-features.rst ++++ b/docs/arm-cpu-features.rst +@@ -31,7 +31,9 @@ supporting the feature or only supporting the feature under certain + configurations. For example, the `aarch64` CPU feature, which, when + disabled, enables the optional AArch32 CPU feature, is only supported + when using the KVM accelerator and when running on a host CPU type that +-supports the feature. ++supports the feature. While `aarch64` currently only works with KVM, ++it could work with TCG. CPU features that are specific to KVM are ++prefixed with "kvm-" and are described in "KVM VCPU Features". + + CPU Feature Probing + =================== +@@ -171,6 +173,39 @@ disabling many SVE vector lengths would be quite verbose, the `sve` CPU + properties have special semantics (see "SVE CPU Property Parsing + Semantics"). + ++KVM VCPU Features ++================= ++ ++KVM VCPU features are CPU features that are specific to KVM, such as ++paravirt features or features that enable CPU virtualization extensions. ++The features' CPU properties are only available when KVM is enabled and ++are named with the prefix "kvm-". KVM VCPU features may be probed, ++enabled, and disabled in the same way as other CPU features. Below is ++the list of KVM VCPU features and their descriptions. ++ ++ kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This ++ means that by default the virtual time ++ adjustment is enabled (vtime is *not not* ++ adjusted). ++ ++ When virtual time adjustment is enabled each ++ time the VM transitions back to running state ++ the VCPU's virtual counter is updated to ensure ++ stopped time is not counted. This avoids time ++ jumps surprising guest OSes and applications, ++ as long as they use the virtual counter for ++ timekeeping. However it has the side effect of ++ the virtual and physical counters diverging. ++ All timekeeping based on the virtual counter ++ will appear to lag behind any timekeeping that ++ does not subtract VM stopped time. The guest ++ may resynchronize its virtual counter with ++ other time sources as needed. ++ ++ Enable kvm-no-adjvtime to disable virtual time ++ adjustment, also restoring the legacy (pre-5.0) ++ behavior. ++ + SVE CPU Properties + ================== + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e108391..d30d38c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1707,6 +1707,11 @@ static void machvirt_init(MachineState *machine) + } + } + ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { ++ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); ++ } ++ + if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { + object_property_set_bool(cpuobj, false, "pmu", NULL); + } +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 53fdf16..77828ce 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -109,6 +109,7 @@ typedef struct { + bool smbios_old_sys_ver; + bool no_highmem_ecam; + bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ ++ bool kvm_no_adjvtime; + } VirtMachineClass; + + typedef struct { +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 3788fc3..e46efe9 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2482,6 +2482,7 @@ static void arm_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + cortex_a15_initfn(obj); + +@@ -2673,6 +2674,7 @@ static void arm_host_initfn(Object *obj) + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + aarch64_add_sve_properties(obj); + } ++ kvm_arm_add_vcpu_properties(obj); + arm_cpu_post_init(obj); + } + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index a39d6fc..3cd416d 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -605,6 +605,7 @@ static void aarch64_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + uint64_t t; + uint32_t u; +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 26d7f8b..4be9497 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -17,6 +17,8 @@ + #include "qemu/timer.h" + #include "qemu/error-report.h" + #include "qemu/main-loop.h" ++#include "qom/object.h" ++#include "qapi/error.h" + #include "sysemu/sysemu.h" + #include "sysemu/kvm.h" + #include "sysemu/kvm_int.h" +@@ -179,6 +181,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + env->features = arm_host_cpu_features.features; + } + ++static bool kvm_no_adjvtime_get(Object *obj, Error **errp) ++{ ++ return !ARM_CPU(obj)->kvm_adjvtime; ++} ++ ++static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) ++{ ++ ARM_CPU(obj)->kvm_adjvtime = !value; ++} ++ ++/* KVM VCPU properties should be prefixed with "kvm-". */ ++void kvm_arm_add_vcpu_properties(Object *obj) ++{ ++ if (!kvm_enabled()) { ++ return; ++ } ++ ++ ARM_CPU(obj)->kvm_adjvtime = true; ++ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, ++ kvm_no_adjvtime_set, &error_abort); ++ object_property_set_description(obj, "kvm-no-adjvtime", ++ "Set on to disable the adjustment of " ++ "the virtual counter. VM stopped time " ++ "will be counted.", &error_abort); ++} ++ + bool kvm_arm_pmu_supported(CPUState *cpu) + { + KVMState *s = KVM_STATE(current_machine->accelerator); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 01a9a18..ae9e075 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -256,6 +256,15 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); + void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + + /** ++ * kvm_arm_add_vcpu_properties: ++ * @obj: The CPU object to add the properties to ++ * ++ * Add all KVM specific CPU properties to the CPU object. These ++ * are the CPU properties with "kvm-" prefixed names. ++ */ ++void kvm_arm_add_vcpu_properties(Object *obj); ++ ++/** + * kvm_arm_aarch32_supported: + * @cs: CPUState + * +@@ -345,6 +354,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + cpu->host_cpu_probe_failed = true; + } + ++static inline void kvm_arm_add_vcpu_properties(Object *obj) {} ++ + static inline bool kvm_arm_aarch32_supported(CPUState *cs) + { + return false; +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index fa054f8..9725dff 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -103,6 +103,7 @@ static const char *cpu_model_advertised_features[] = { + "sve128", "sve256", "sve384", "sve512", + "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", + "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", ++ "kvm-no-adjvtime", + NULL + }; + +diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c +index 89285ca..ba1a6fe 100644 +--- a/tests/arm-cpu-features.c ++++ b/tests/arm-cpu-features.c +@@ -428,6 +428,8 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); + ++ assert_has_not_feature(qts, "max", "kvm-no-adjvtime"); ++ + if (g_str_equal(qtest_get_arch(), "aarch64")) { + assert_has_feature_enabled(qts, "max", "aarch64"); + assert_has_feature_enabled(qts, "max", "sve"); +@@ -462,6 +464,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + return; + } + ++ assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime"); ++ + if (g_str_equal(qtest_get_arch(), "aarch64")) { + bool kvm_supports_sve; + char max_name[8], name[8]; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch b/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch new file mode 100644 index 0000000..3396a32 --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch @@ -0,0 +1,330 @@ +From 5388ea3fc0737d1a659256ff3663057bef484c19 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:13 +0000 +Subject: [PATCH 11/15] target/arm/kvm: Implement virtual time adjustment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-5-drjones@redhat.com> +Patchwork-id: 93622 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/5] target/arm/kvm: Implement virtual time adjustment +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/kvm: Implement virtual time adjustment + + When a VM is stopped (such as when it's paused) guest virtual time + should stop counting. Otherwise, when the VM is resumed it will + experience time jumps and its kernel may report soft lockups. Not + counting virtual time while the VM is stopped has the side effect + of making the guest's time appear to lag when compared with real + time, and even with time derived from the physical counter. For + this reason, this change, which is enabled by default, comes with + a KVM CPU feature allowing it to be disabled, restoring legacy + behavior. + + This patch only provides the implementation of the virtual time + adjustment. A subsequent patch will provide the CPU property + allowing the change to be enabled and disabled. + + Reported-by: Bijan Mottahedeh + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-6-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit e5ac4200b4cddf44df9adbef677af0d1f1c579c6) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/cpu.h | 7 ++++ + target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm32.c | 3 ++ + target/arm/kvm64.c | 3 ++ + target/arm/kvm_arm.h | 38 ++++++++++++++++++++++ + target/arm/machine.c | 7 ++++ + 6 files changed, 150 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 82dd3cc..fbd8ea0 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -821,6 +821,13 @@ struct ARMCPU { + /* KVM init features for this CPU */ + uint32_t kvm_init_features[7]; + ++ /* KVM CPU state */ ++ ++ /* KVM virtual time adjustment */ ++ bool kvm_adjvtime; ++ bool kvm_vtime_dirty; ++ uint64_t kvm_vtime; ++ + /* Uniprocessor system with MP extensions */ + bool mp_is_up; + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 5b82cef..26d7f8b 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -359,6 +359,22 @@ static int compare_u64(const void *a, const void *b) + return 0; + } + ++/* ++ * cpreg_values are sorted in ascending order by KVM register ID ++ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find ++ * the storage for a KVM register by ID with a binary search. ++ */ ++static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) ++{ ++ uint64_t *res; ++ ++ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, ++ sizeof(uint64_t), compare_u64); ++ assert(res); ++ ++ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; ++} ++ + /* Initialize the ARMCPU cpreg list according to the kernel's + * definition of what CPU registers it knows about (and throw away + * the previous TCG-created cpreg list). +@@ -512,6 +528,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) + return ok; + } + ++void kvm_arm_cpu_pre_save(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_vtime_dirty) { ++ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; ++ } ++} ++ ++void kvm_arm_cpu_post_load(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_adjvtime) { ++ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); ++ cpu->kvm_vtime_dirty = true; ++ } ++} ++ + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; +@@ -579,6 +612,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) + return 0; + } + ++void kvm_arm_get_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = true; ++} ++ ++void kvm_arm_put_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (!cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = false; ++} ++ + int kvm_put_vcpu_events(ARMCPU *cpu) + { + CPUARMState *env = &cpu->env; +@@ -690,6 +767,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) + return MEMTXATTRS_UNSPECIFIED; + } + ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state) ++{ ++ CPUState *cs = opaque; ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ if (running) { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_put_virtual_time(cs); ++ } ++ } else { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_get_virtual_time(cs); ++ } ++ } ++} + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { +diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c +index 32bf8d6..3a8b437 100644 +--- a/target/arm/kvm32.c ++++ b/target/arm/kvm32.c +@@ -16,6 +16,7 @@ + #include "qemu-common.h" + #include "cpu.h" + #include "qemu/timer.h" ++#include "sysemu/runstate.h" + #include "sysemu/kvm.h" + #include "kvm_arm.h" + #include "internals.h" +@@ -198,6 +199,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 666a81a..d368189 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -23,6 +23,7 @@ + #include "qemu/host-utils.h" + #include "qemu/main-loop.h" + #include "exec/gdbstub.h" ++#include "sysemu/runstate.h" + #include "sysemu/kvm.h" + #include "sysemu/kvm_int.h" + #include "kvm_arm.h" +@@ -735,6 +736,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index b48a9c9..01a9a18 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -128,6 +128,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); + bool write_kvmstate_to_list(ARMCPU *cpu); + + /** ++ * kvm_arm_cpu_pre_save: ++ * @cpu: ARMCPU ++ * ++ * Called after write_kvmstate_to_list() from cpu_pre_save() to update ++ * the cpreg list with KVM CPU state. ++ */ ++void kvm_arm_cpu_pre_save(ARMCPU *cpu); ++ ++/** ++ * kvm_arm_cpu_post_load: ++ * @cpu: ARMCPU ++ * ++ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. ++ */ ++void kvm_arm_cpu_post_load(ARMCPU *cpu); ++ ++/** + * kvm_arm_reset_vcpu: + * @cpu: ARMCPU + * +@@ -292,6 +309,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + ++/** ++ * kvm_arm_get_virtual_time: ++ * @cs: CPUState ++ * ++ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. ++ */ ++void kvm_arm_get_virtual_time(CPUState *cs); ++ ++/** ++ * kvm_arm_put_virtual_time: ++ * @cs: CPUState ++ * ++ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. ++ */ ++void kvm_arm_put_virtual_time(CPUState *cs); ++ ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state); ++ + int kvm_arm_vgic_probe(void); + + void kvm_arm_pmu_set_irq(CPUState *cs, int irq); +@@ -339,6 +374,9 @@ static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} + static inline void kvm_arm_pmu_init(CPUState *cs) {} + + static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) {} ++ ++static inline void kvm_arm_get_virtual_time(CPUState *cs) {} ++static inline void kvm_arm_put_virtual_time(CPUState *cs) {} + #endif + + static inline const char *gic_class_name(void) +diff --git a/target/arm/machine.c b/target/arm/machine.c +index eb28b23..241890a 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -642,6 +642,12 @@ static int cpu_pre_save(void *opaque) + /* This should never fail */ + abort(); + } ++ ++ /* ++ * kvm_arm_cpu_pre_save() must be called after ++ * write_kvmstate_to_list() ++ */ ++ kvm_arm_cpu_pre_save(cpu); + } else { + if (!write_cpustate_to_list(cpu, false)) { + /* This should never fail. */ +@@ -744,6 +750,7 @@ static int cpu_post_load(void *opaque, int version_id) + * we're using it. + */ + write_list_to_cpustate(cpu); ++ kvm_arm_cpu_post_load(cpu); + } else { + if (!write_list_to_cpustate(cpu)) { + return -1; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch b/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch new file mode 100644 index 0000000..8cdc867 --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch @@ -0,0 +1,197 @@ +From 11cb9cb7b1b56d5c9723e9c50bc2903281893bcc Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:10 +0000 +Subject: [PATCH 08/15] target/arm/kvm: trivial: Clean up header documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-2-drjones@redhat.com> +Patchwork-id: 93625 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/5] target/arm/kvm: trivial: Clean up header documentation +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:05 +0000 + + target/arm/kvm: trivial: Clean up header documentation + + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-2-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit d1ebbc9d16297b54b153ee33abe05eb4f1df0c66) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/kvm_arm.h | 46 +++++++++++++++++++++++++++------------------- + 1 file changed, 27 insertions(+), 19 deletions(-) + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 8e14d40..b48a9c9 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -28,9 +28,9 @@ + int kvm_arm_vcpu_init(CPUState *cs); + + /** +- * kvm_arm_vcpu_finalize ++ * kvm_arm_vcpu_finalize: + * @cs: CPUState +- * @feature: int ++ * @feature: feature to finalize + * + * Finalizes the configuration of the specified VCPU feature by + * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring +@@ -75,8 +75,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + int kvm_arm_init_cpreg_list(ARMCPU *cpu); + + /** +- * kvm_arm_reg_syncs_via_cpreg_list +- * regidx: KVM register index ++ * kvm_arm_reg_syncs_via_cpreg_list: ++ * @regidx: KVM register index + * + * Return true if this KVM register should be synchronized via the + * cpreg list of arbitrary system registers, false if it is synchronized +@@ -85,8 +85,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); + bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); + + /** +- * kvm_arm_cpreg_level +- * regidx: KVM register index ++ * kvm_arm_cpreg_level: ++ * @regidx: KVM register index + * + * Return the level of this coprocessor/system register. Return value is + * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. +@@ -148,6 +148,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_get_vcpu_events(ARMCPU *cpu); + +@@ -156,6 +158,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_put_vcpu_events(ARMCPU *cpu); + +@@ -205,10 +209,12 @@ typedef struct ARMHostCPUFeatures { + + /** + * kvm_arm_get_host_cpu_features: +- * @ahcc: ARMHostCPUClass to fill in ++ * @ahcf: ARMHostCPUClass to fill in + * + * Probe the capabilities of the host kernel's preferred CPU and fill + * in the ARMHostCPUClass struct accordingly. ++ * ++ * Returns true on success and false otherwise. + */ + bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); + +@@ -242,7 +248,7 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + bool kvm_arm_aarch32_supported(CPUState *cs); + + /** +- * bool kvm_arm_pmu_supported: ++ * kvm_arm_pmu_supported: + * @cs: CPUState + * + * Returns: true if the KVM VCPU can enable its PMU +@@ -251,7 +257,7 @@ bool kvm_arm_aarch32_supported(CPUState *cs); + bool kvm_arm_pmu_supported(CPUState *cs); + + /** +- * bool kvm_arm_sve_supported: ++ * kvm_arm_sve_supported: + * @cs: CPUState + * + * Returns true if the KVM VCPU can enable SVE and false otherwise. +@@ -259,26 +265,30 @@ bool kvm_arm_pmu_supported(CPUState *cs); + bool kvm_arm_sve_supported(CPUState *cs); + + /** +- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the +- * IPA address space supported by KVM +- * ++ * kvm_arm_get_max_vm_ipa_size: + * @ms: Machine state handle ++ * ++ * Returns the number of bits in the IPA address space supported by KVM + */ + int kvm_arm_get_max_vm_ipa_size(MachineState *ms); + + /** +- * kvm_arm_sync_mpstate_to_kvm ++ * kvm_arm_sync_mpstate_to_kvm: + * @cpu: ARMCPU + * + * If supported set the KVM MP_STATE based on QEMU's model. ++ * ++ * Returns 0 on success and -1 on failure. + */ + int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + + /** +- * kvm_arm_sync_mpstate_to_qemu ++ * kvm_arm_sync_mpstate_to_qemu: + * @cpu: ARMCPU + * + * If supported get the MP_STATE from KVM and store in QEMU's model. ++ * ++ * Returns 0 on success and aborts on failure. + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + +@@ -292,7 +302,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + + static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + { +- /* This should never actually be called in the "not KVM" case, ++ /* ++ * This should never actually be called in the "not KVM" case, + * but set up the fields to indicate an error anyway. + */ + cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; +@@ -377,23 +388,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); + * + * Return: TRUE if any hardware breakpoints in use. + */ +- + bool kvm_arm_hw_debug_active(CPUState *cs); + + /** + * kvm_arm_copy_hw_debug_data: +- * + * @ptr: kvm_guest_debug_arch structure + * + * Copy the architecture specific debug registers into the + * kvm_guest_debug ioctl structure. + */ + struct kvm_guest_debug_arch; +- + void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); + + /** +- * its_class_name ++ * its_class_name: + * + * Return the ITS class name to use depending on whether KVM acceleration + * and KVM CAP_SIGNAL_MSI are supported +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch new file mode 100644 index 0000000..36c0f1a --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch @@ -0,0 +1,60 @@ +From 2740a84fe798ade5c1ce725d65cdaffb255da47c Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:11 +0000 +Subject: [PATCH 09/15] target/arm/kvm64: kvm64 cpus have timer registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-3-drjones@redhat.com> +Patchwork-id: 93621 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/5] target/arm/kvm64: kvm64 cpus have timer registers +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/kvm64: kvm64 cpus have timer registers + + Add the missing GENERIC_TIMER feature to kvm64 cpus. + + We don't currently use these registers when KVM is enabled, but it's + probably best we add the feature flag for consistency and potential + future use. There's also precedent, as we add the PMU feature flag to + KVM enabled guests, even though we don't use those registers either. + + This change was originally posted as a hunk of a different, never + merged patch from Bijan Mottahedeh. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101023.16030-4-drjones@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit 65caa415487f4a6e265105446c6ef8f56bb0aa70) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/kvm64.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index e2da756..666a81a 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -605,6 +605,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + set_feature(&features, ARM_FEATURE_NEON); + set_feature(&features, ARM_FEATURE_AARCH64); + set_feature(&features, ARM_FEATURE_PMU); ++ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); + + ahcf->features = features; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch b/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch new file mode 100644 index 0000000..55f328d --- /dev/null +++ b/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch @@ -0,0 +1,81 @@ +From c82cf5c08617c947b34eb490d1714729103e3379 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Mon, 10 Feb 2020 17:33:57 +0000 +Subject: [PATCH 17/18] target/arm/monitor: query-cpu-model-expansion crashed + qemu when using machine type none +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200210173358.16896-2-drjones@redhat.com> +Patchwork-id: 93773 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none +Bugzilla: 1801320 +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 + +Author: Liang Yan +Date: Fri, 07 Feb 2020 14:04:21 +0000 + + target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none + + Commit e19afd566781 mentioned that target-arm only supports queryable + cpu models 'max', 'host', and the current type when KVM is in use. + The logic works well until using machine type none. + + For machine type none, cpu_type will be null if cpu option is not + set by command line, strlen(cpu_type) will terminate process. + So We add a check above it. + + This won't affect i386 and s390x since they do not use current_cpu. + + Signed-off-by: Liang Yan + Message-id: 20200203134251.12986-1-lyan@suse.com + Reviewed-by: Andrew Jones + Tested-by: Andrew Jones + Signed-off-by: Peter Maydell + +(cherry picked from commit 0999a4ba8718aa96105b978d3567fc7e90244c7e) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/monitor.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index 9725dff..c2dc790 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -137,17 +137,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + } + + if (kvm_enabled()) { +- const char *cpu_type = current_machine->cpu_type; +- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); + bool supported = false; + + if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { + /* These are kvmarm's recommended cpu types */ + supported = true; +- } else if (strlen(model->name) == len && +- !strncmp(model->name, cpu_type, len)) { +- /* KVM is enabled and we're using this type, so it works. */ +- supported = true; ++ } else if (current_machine->cpu_type) { ++ const char *cpu_type = current_machine->cpu_type; ++ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); ++ ++ if (strlen(model->name) == len && ++ !strncmp(model->name, cpu_type, len)) { ++ /* KVM is enabled and we're using this type, so it works. */ ++ supported = true; ++ } + } + if (!supported) { + error_setg(errp, "We cannot guarantee the CPU type '%s' works " +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch b/SOURCES/kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch new file mode 100644 index 0000000..ffb6ab7 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch @@ -0,0 +1,83 @@ +From 4c9201a83e3ff48d2a55e45a34eb27966a1e4ab0 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Fri, 5 Jun 2020 18:37:33 -0400 +Subject: [PATCH 3/3] target/i386: Add ARCH_CAPABILITIES related bits into + Icelake-Server CPU model +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: plai@redhat.com +Message-id: <20200605183733.8269-1-plai@redhat.com> +Patchwork-id: 97380 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH] target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model +Bugzilla: 1840342 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Danilo de Paula +RH-Acked-by: Eduardo Habkost + +From: Xiaoyao Li + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1840342 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28983822 +Branch: rhel-av-8.2.1 + +Tested on HOST: intel-whitley-09.khw1.lab.eng.bos.redhat.com + +1. qemu-kvm -cpu host … + VM guest does have arch_capabilities in cpuinfo/flags. + [Expected success] + +2. qemu-kvm -cpu Icelake-Server … + VM guest does NOT have arch_capabilities in cpuinfo/flags. + [Expected failure] + +3. qemu-kvm -cpu Icelake-Server-v3 … + VM guest does have arch_capabilities in cpuinfo/flags. + [Expected success] + +--- + +Current Icelake-Server CPU model lacks all the features enumerated by +MSR_IA32_ARCH_CAPABILITIES. + +Add them, so that guest of "Icelake-Server" can see all of them. + +Signed-off-by: Xiaoyao Li +Message-Id: <20200316095605.12318-1-xiaoyao.li@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit d965dc35592d24c0c1519f1c566223c6277cb80e) +Signed-off-by: Paul Lai +Signed-off-by: Eduardo Lima (Etrunko) +--- + target/i386/cpu.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index b763adcdc5..7d7b016bb7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3496,6 +3496,19 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 3, ++ .props = (PropValue[]) { ++ { "arch-capabilities", "on" }, ++ { "rdctl-no", "on" }, ++ { "ibrs-all", "on" }, ++ { "skip-l1dfl-vmentry", "on" }, ++ { "mds-no", "on" }, ++ { "pschange-mc-no", "on" }, ++ { "taa-no", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.27.0 + diff --git a/SOURCES/kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch b/SOURCES/kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch new file mode 100644 index 0000000..ef95ccf --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch @@ -0,0 +1,103 @@ +From 1ffeb321151b3878bcbb2229639456c0677305f5 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Fri, 15 May 2020 18:02:43 +0100 +Subject: [PATCH 17/17] target/i386: Add missed features to Cooperlake CPU + model + +RH-Author: plai@redhat.com +Message-id: <20200515180243.17488-5-plai@redhat.com> +Patchwork-id: 96611 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 4/4] target/i386: Add missed features to Cooperlake CPU model +Bugzilla: 1769912 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Xiaoyao Li + +It lacks VMX features and two security feature bits (disclosed recently) in +MSR_IA32_ARCH_CAPABILITIES in current Cooperlake CPU model, so add them. + +Fixes: 22a866b6166d ("i386: Add new CPU model Cooperlake") +Signed-off-by: Xiaoyao Li +Message-Id: <20191225063018.20038-3-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 2dea9d9ca4ea7e9afe83d0b4153b21a16987e866) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 50 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 996a74f..b763adc 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3202,7 +3202,8 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | +- MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, + /* +@@ -3217,6 +3218,54 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, ++ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Cooperlake)", + }, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch b/SOURCES/kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch new file mode 100644 index 0000000..ad2dd77 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch @@ -0,0 +1,62 @@ +From 6f0630299a3edbb8f5e5ac41eb9e1f1c363f1e3e Mon Sep 17 00:00:00 2001 +From: Danilo de Paula +Date: Tue, 9 Jun 2020 18:46:51 +0100 +Subject: [PATCH 15/17] target/i386: Add new bit definitions of + MSR_IA32_ARCH_CAPABILITIES + +RH-Author: Danilo de Paula +Message-id: <20200609184651.1328372-1-ddepaula@redhat.com> +Patchwork-id: 97489 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 5/4] target/i386: Add new bit definitions of MSR_IA32_ARCH_CAPABILITIES +Bugzilla: 1769912 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Eduardo Habkost + +From: Danilo de Paula + +redhat: builds with that series were failing. It complains about a undefined +MSR_ARCH_CAP_TAA_NO. + +The bit 6, 7 and 8 of MSR_IA32_ARCH_CAPABILITIES are recently disclosed +for some security issues. Add the definitions for them to be used by named +CPU models. + +Signed-off-by: Xiaoyao Li +Message-Id: <20191225063018.20038-2-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6c997b4adb300788d61d72e2b8bc67c03a584956) + +Signed-off-by: Paolo Bonzini +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.h | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index e77d101..7bfbf2a 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -836,12 +836,15 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_TOPOLOGY_LEVEL_DIE (5U << 8) + + /* MSR Feature Bits */ +-#define MSR_ARCH_CAP_RDCL_NO (1U << 0) +-#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) +-#define MSR_ARCH_CAP_RSBA (1U << 2) ++#define MSR_ARCH_CAP_RDCL_NO (1U << 0) ++#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) ++#define MSR_ARCH_CAP_RSBA (1U << 2) + #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) +-#define MSR_ARCH_CAP_SSB_NO (1U << 4) +-#define MSR_ARCH_CAP_MDS_NO (1U << 5) ++#define MSR_ARCH_CAP_SSB_NO (1U << 4) ++#define MSR_ARCH_CAP_MDS_NO (1U << 5) ++#define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) ++#define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) ++#define MSR_ARCH_CAP_TAA_NO (1U << 8) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch b/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch new file mode 100644 index 0000000..5c3c770 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch @@ -0,0 +1,125 @@ +From 4009f0bcc8004ce481015d088fe335a16b8d7ce1 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:12 +0000 +Subject: [PATCH 2/9] target/i386: add a ucode-rev property + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-3-pbonzini@redhat.com> +Patchwork-id: 93909 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property +Bugzilla: 1791648 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Add the property and plumb it in TCG and HVF (the latter of which +tried to support returning a constant value but used the wrong MSR). + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 10 ++++++++++ + target/i386/cpu.h | 3 +++ + target/i386/hvf/x86_emu.c | 4 +--- + target/i386/misc_helper.c | 4 ++++ + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 863192c..e505d3e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6325,6 +6325,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + } + } + ++ if (cpu->ucode_rev == 0) { ++ /* The default is the same as KVM's. */ ++ if (IS_AMD_CPU(env)) { ++ cpu->ucode_rev = 0x01000065; ++ } else { ++ cpu->ucode_rev = 0x100000000ULL; ++ } ++ } ++ + /* mwait extended info: needed for Core compatibility */ + /* We always wake on interrupt even if host does not have the capability */ + cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; +@@ -7008,6 +7017,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), + DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), + DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), ++ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), + DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), + DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), + DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index cde2a16..4441061 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -348,6 +348,7 @@ typedef enum X86Seg { + #define MSR_IA32_SPEC_CTRL 0x48 + #define MSR_VIRT_SSBD 0xc001011f + #define MSR_IA32_PRED_CMD 0x49 ++#define MSR_IA32_UCODE_REV 0x8b + #define MSR_IA32_CORE_CAPABILITY 0xcf + + #define MSR_IA32_ARCH_CAPABILITIES 0x10a +@@ -1621,6 +1622,8 @@ struct X86CPU { + CPUNegativeOffsetState neg; + CPUX86State env; + ++ uint64_t ucode_rev; ++ + uint32_t hyperv_spinlock_attempts; + char *hyperv_vendor_id; + bool hyperv_synic_kvm_only; +diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c +index 3df7672..92ab815 100644 +--- a/target/i386/hvf/x86_emu.c ++++ b/target/i386/hvf/x86_emu.c +@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) + RIP(env) += decode->len; + } + +-#define MSR_IA32_UCODE_REV 0x00000017 +- + void simulate_rdmsr(struct CPUState *cpu) + { + X86CPU *x86_cpu = X86_CPU(cpu); +@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) + val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); + break; + case MSR_IA32_UCODE_REV: +- val = (0x100000000ULL << 32) | 0x100000000ULL; ++ val = x86_cpu->ucode_rev; + break; + case MSR_EFER: + val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); +diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c +index 3eff688..aed16fe 100644 +--- a/target/i386/misc_helper.c ++++ b/target/i386/misc_helper.c +@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) + #else + void helper_wrmsr(CPUX86State *env) + { ++ X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); +@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) + env->msr_bndcfgs = val; + cpu_sync_bndcs_hflags(env); + break; ++ case MSR_IA32_UCODE_REV: ++ val = x86_cpu->ucode_rev; ++ break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch new file mode 100644 index 0000000..a80c9d3 --- /dev/null +++ b/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch @@ -0,0 +1,72 @@ +From 27d7b085f2f568050d638b694ed2f51495db718c Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:15 +0000 +Subject: [PATCH 5/9] target/i386: check for availability of MSR_IA32_UCODE_REV + as an emulated MSR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-6-pbonzini@redhat.com> +Patchwork-id: 93898 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, +which added it to the emulated MSR list, a bug caused the microcode +version to revert to 0x100000000 on INIT. As a result, processors other +than the bootstrap processor would not see the host microcode revision; +some Windows version complain loudly about this and crash with a +fairly explicit MICROCODE REVISION MISMATCH error. + +[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset + microcode version on INIT or RESET" should also be applied.] + +Reported-by: Alex Williamson +Message-id: <20200211175516.10716-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6702514814c7e7b4cbf179624539b5f38c72740b) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 6c61aef..99840ca 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -105,6 +105,7 @@ static bool has_msr_smi_count; + static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; ++static bool has_msr_ucode_rev; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -2056,6 +2057,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_VMX_VMFUNC: + has_msr_vmx_vmfunc = true; + break; ++ case MSR_IA32_UCODE_REV: ++ has_msr_ucode_rev = true; ++ break; + } + } + } +@@ -2696,8 +2700,7 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + +- if (kvm_arch_get_supported_msr_feature(kvm_state, +- MSR_IA32_UCODE_REV)) { ++ if (has_msr_ucode_rev) { + kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch new file mode 100644 index 0000000..4c2362d --- /dev/null +++ b/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch @@ -0,0 +1,112 @@ +From 77cdcccc49ba988e3b5bcb66decdee2e99fdcd72 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Tue, 14 Apr 2020 15:00:36 +0100 +Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution + controls + +RH-Author: Vitaly Kuznetsov +Message-id: <20200414150036.625732-2-vkuznets@redhat.com> +Patchwork-id: 94674 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/i386: do not set unsupported VMX secondary execution controls +Bugzilla: 1822682 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Paolo Bonzini + +Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for +secondary execution controls") added a workaround for KVM pre-dating +commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm +KVM_GET_MSRS") which wasn't setting certain available controls. The +workaround uses generic CPUID feature bits to set missing VMX controls. + +It was found that in some cases it is possible to observe hosts which +have certain CPUID features but lack the corresponding VMX control. + +In particular, it was reported that Azure VMs have RDSEED but lack +VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature +bit result in QEMU abort. + +Resolve the issue but not applying the workaround when we don't have +to. As there is no good way to find out if KVM has the fix itself, use +95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead +as these [are supposed to] come together. + +Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") +Suggested-by: Paolo Bonzini +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4a910e1f6ab4155ec8b24c49b2585cc486916985) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 15 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 99840ca..fcc8f7d 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -106,6 +106,7 @@ static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; + static bool has_msr_ucode_rev; ++static bool has_msr_vmx_procbased_ctls2; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -490,21 +491,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + value = msr_data.entries[0].data; + switch (index) { + case MSR_IA32_VMX_PROCBASED_CTLS2: +- /* KVM forgot to add these bits for some time, do this ourselves. */ +- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ if (!has_msr_vmx_procbased_ctls2) { ++ /* KVM forgot to add these bits for some time, do this ourselves. */ ++ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & ++ CPUID_XSAVE_XSAVES) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & ++ CPUID_EXT_RDRAND) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_INVPCID) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_RDSEED) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & ++ CPUID_EXT2_RDTSCP) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ } + } + /* fall through */ + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: +@@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_UCODE_REV: + has_msr_ucode_rev = true; + break; ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ has_msr_vmx_procbased_ctls2 = true; ++ break; + } + } + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch b/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch new file mode 100644 index 0000000..47438a3 --- /dev/null +++ b/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch @@ -0,0 +1,49 @@ +From 7b71a7011437ebfa3bc7df9297e892b82293ec98 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:16 +0000 +Subject: [PATCH 6/9] target/i386: enable monitor and ucode revision with -cpu + max +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-7-pbonzini@redhat.com> +Patchwork-id: 93910 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +These two features were incorrectly tied to host_cpuid_required rather than +cpu->max_features. As a result, -cpu max was not enabling either MONITOR +features or ucode revision. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) + +[RHEL7: context, upstream uses g_autofree] + +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5ac843d..1685a8c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + g_free(name); + goto out; + } ++ } + ++ if (cpu->max_features && accel_uses_host_cpuid()) { + if (enable_cpu_pm) { + host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, + &cpu->mwait.ecx, &cpu->mwait.edx); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch b/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch new file mode 100644 index 0000000..c7ced8a --- /dev/null +++ b/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch @@ -0,0 +1,73 @@ +From 3d16f05359e6277da1f970f71aa9f76337d655dc Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:14 +0000 +Subject: [PATCH 4/9] target/i386: fix TCG UCODE_REV access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-5-pbonzini@redhat.com> +Patchwork-id: 93904 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] target/i386: fix TCG UCODE_REV access +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +This was a very interesting semantic conflict that caused git to move +the MSR_IA32_UCODE_REV read to helper_wrmsr. Not a big deal, but +still should be fixed... + +Fixes: 4e45aff398 ("target/i386: add a ucode-rev property", 2020-01-24) +Message-id: <20200206171022.9289-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9028c75c9d08be303ccc425bfe3d3b23d8f4cac7) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/misc_helper.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c +index aed16fe..7d61221 100644 +--- a/target/i386/misc_helper.c ++++ b/target/i386/misc_helper.c +@@ -229,7 +229,6 @@ void helper_rdmsr(CPUX86State *env) + #else + void helper_wrmsr(CPUX86State *env) + { +- X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); +@@ -372,9 +371,6 @@ void helper_wrmsr(CPUX86State *env) + env->msr_bndcfgs = val; + cpu_sync_bndcs_hflags(env); + break; +- case MSR_IA32_UCODE_REV: +- val = x86_cpu->ucode_rev; +- break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +@@ -393,6 +389,7 @@ void helper_wrmsr(CPUX86State *env) + + void helper_rdmsr(CPUX86State *env) + { ++ X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC()); +@@ -526,6 +523,9 @@ void helper_rdmsr(CPUX86State *env) + case MSR_IA32_BNDCFGS: + val = env->msr_bndcfgs; + break; ++ case MSR_IA32_UCODE_REV: ++ val = x86_cpu->ucode_rev; ++ break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch b/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch new file mode 100644 index 0000000..5118aed --- /dev/null +++ b/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch @@ -0,0 +1,178 @@ +From eb0fc0ae2750a0462698d6d21ebb56a4249539f9 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:11 +0000 +Subject: [PATCH 1/9] target/i386: kvm: initialize feature MSRs very early +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-2-pbonzini@redhat.com> +Patchwork-id: 93899 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Some read-only MSRs affect the behavior of ioctls such as +KVM_SET_NESTED_STATE. We can initialize them once and for all +right after the CPU is realized, since they will never be modified +by the guest. + +Reported-by: Qingua Cheng +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 81 ++++++++++++++++++++++++++++++-------------------- + target/i386/kvm_i386.h | 1 + + 2 files changed, 49 insertions(+), 33 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 86d9a1f..f41605b 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -67,6 +67,8 @@ + * 255 kvm_msr_entry structs */ + #define MSR_BUF_SIZE 4096 + ++static void kvm_init_msrs(X86CPU *cpu); ++ + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), +@@ -1842,6 +1844,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + has_msr_tsc_aux = false; + } + ++ kvm_init_msrs(cpu); ++ + r = hyperv_init_vcpu(cpu); + if (r) { + goto fail; +@@ -2660,11 +2664,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) + VMCS12_MAX_FIELD_INDEX << 1); + } + ++static int kvm_buf_set_msrs(X86CPU *cpu) ++{ ++ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if (ret < cpu->kvm_msr_buf->nmsrs) { ++ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; ++ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, ++ (uint32_t)e->index, (uint64_t)e->data); ++ } ++ ++ assert(ret == cpu->kvm_msr_buf->nmsrs); ++ return 0; ++} ++ ++static void kvm_init_msrs(X86CPU *cpu) ++{ ++ CPUX86State *env = &cpu->env; ++ ++ kvm_msr_buf_reset(cpu); ++ if (has_msr_arch_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, ++ env->features[FEAT_ARCH_CAPABILITIES]); ++ } ++ ++ if (has_msr_core_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, ++ env->features[FEAT_CORE_CAPABILITY]); ++ } ++ ++ /* ++ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but ++ * all kernels with MSR features should have them. ++ */ ++ if (kvm_feature_msrs && cpu_has_vmx(env)) { ++ kvm_msr_entry_add_vmx(cpu, env->features); ++ } ++ ++ assert(kvm_buf_set_msrs(cpu) == 0); ++} ++ + static int kvm_put_msrs(X86CPU *cpu, int level) + { + CPUX86State *env = &cpu->env; + int i; +- int ret; + + kvm_msr_buf_reset(cpu); + +@@ -2722,17 +2768,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + #endif + +- /* If host supports feature MSR, write down. */ +- if (has_msr_arch_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, +- env->features[FEAT_ARCH_CAPABILITIES]); +- } +- +- if (has_msr_core_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, +- env->features[FEAT_CORE_CAPABILITY]); +- } +- + /* + * The following MSRs have side effects on the guest or are too heavy + * for normal writeback. Limit them to reset or full state updates. +@@ -2910,14 +2945,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ +- +- /* +- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but +- * all kernels with MSR features should have them. +- */ +- if (kvm_feature_msrs && cpu_has_vmx(env)) { +- kvm_msr_entry_add_vmx(cpu, env->features); +- } + } + + if (env->mcg_cap) { +@@ -2933,19 +2960,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + } + +- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); +- if (ret < 0) { +- return ret; +- } +- +- if (ret < cpu->kvm_msr_buf->nmsrs) { +- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; +- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, +- (uint32_t)e->index, (uint64_t)e->data); +- } +- +- assert(ret == cpu->kvm_msr_buf->nmsrs); +- return 0; ++ return kvm_buf_set_msrs(cpu); + } + + +diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h +index 06fe06b..d98c6f6 100644 +--- a/target/i386/kvm_i386.h ++++ b/target/i386/kvm_i386.h +@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); + bool kvm_has_x2apic_api(void); + + bool kvm_hv_vpindex_settable(void); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch b/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch new file mode 100644 index 0000000..99b18fc --- /dev/null +++ b/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch @@ -0,0 +1,64 @@ +From 8f39b0c9523630efeb451e2298cf64b88cd2ac81 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:13 +0000 +Subject: [PATCH 3/9] target/i386: kvm: initialize microcode revision from KVM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-4-pbonzini@redhat.com> +Patchwork-id: 93897 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] target/i386: kvm: initialize microcode revision from KVM +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +KVM can return the host microcode revision as a feature MSR. +Use it as the default value for -cpu host. + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 32c87d70ff55b96741f08c35108935cac6f40fe4) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 4 ++++ + target/i386/kvm.c | 5 +++++ + 2 files changed, 9 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index e505d3e..5ac843d 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6323,6 +6323,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + &cpu->mwait.ecx, &cpu->mwait.edx); + env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; + } ++ if (kvm_enabled() && cpu->ucode_rev == 0) { ++ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV); ++ } + } + + if (cpu->ucode_rev == 0) { +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index f41605b..6c61aef 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2696,6 +2696,11 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + ++ if (kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV)) { ++ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); ++ } ++ + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch b/SOURCES/kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch new file mode 100644 index 0000000..49e54ba --- /dev/null +++ b/SOURCES/kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch @@ -0,0 +1,69 @@ +From 72a1827006be22791017ff2b671eac1c96be5d12 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 7 May 2020 22:09:23 +0100 +Subject: [PATCH 01/26] target/i386: set the CPUID level to 0x14 on old + machine-type + +RH-Author: plai@redhat.com +Message-id: <20200507220923.13723-1-plai@redhat.com> +Patchwork-id: 96347 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH RESEND] target/i386: set the CPUID level to 0x14 on old machine-type +Bugzilla: 1513681 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Igor Mammedov +RH-Acked-by: Danilo de Paula + +From: Luwei Kang + +BZ https://bugzilla.redhat.com/show_bug.cgi?id=1513681 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28146304 +Branch: rhel-av-8.2.1 + +Tested on intel-icelake-y-01.ml3.eng.bos.redhat.com. + +The CPUID level need to be set to 0x14 manually on old +machine-type if Intel PT is enabled in guest. E.g. the +CPUID[0].EAX(level)=7 and CPUID[7].EBX[25](intel-pt)=1 when the +Qemu with "-machine pc-i440fx-3.1 -cpu qemu64,+intel-pt" parameter. + +Some Intel PT capabilities are exposed by leaf 0x14 and the +missing capabilities will cause some MSRs access failed. +This patch add a warning message to inform the user to extend +the CPUID level. + +Suggested-by: Eduardo Habkost +Signed-off-by: Luwei Kang +Message-Id: <1584031686-16444-1-git-send-email-luwei.kang@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit ddc2fc9e4e42ebce48b088963dc7fbd1c08d5f33) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1685a8c..0f0a2db 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6206,9 +6206,14 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + x86_cpu_adjust_feat_level(cpu, FEAT_XSAVE); + + /* Intel Processor Trace requires CPUID[0x14] */ +- if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && +- kvm_enabled() && cpu->intel_pt_auto_level) { +- x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); ++ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT)) { ++ if (cpu->intel_pt_auto_level) { ++ x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); ++ } else if (cpu->env.cpuid_min_level < 0x14) { ++ mark_unavailable_features(cpu, FEAT_7_0_EBX, ++ CPUID_7_0_EBX_INTEL_PT, ++ "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,level=0x14\""); ++ } + } + + /* CPU topology with multi-dies support requires CPUID[0x1F] */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch b/SOURCES/kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch new file mode 100644 index 0000000..60abc1b --- /dev/null +++ b/SOURCES/kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch @@ -0,0 +1,56 @@ +From 9adf5e57df32df464e7465b1df72c993d0ed4ed4 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 Jul 2020 18:08:35 -0400 +Subject: [PATCH 3/4] target/i386: sev: fail query-sev-capabilities if QEMU + cannot use SEV +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200731180835.86786-3-pbonzini@redhat.com> +Patchwork-id: 98124 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/2] target/i386: sev: fail query-sev-capabilities if QEMU cannot use SEV +Bugzilla: 1689341 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Dr. David Alan Gilbert + +In some cases, such as if the kvm-amd "sev" module parameter is set +to 0, SEV will be unavailable but query-sev-capabilities will still +return all the information. This tricks libvirt into erroneously +reporting that SEV is available. Check the actual usability of the +feature and return the appropriate error if QEMU cannot use KVM +or KVM cannot use SEV. + +Reviewed-by: Eric Blake +Signed-off-by: Paolo Bonzini +cherry picked from commit 1b38750c40281dd0d068f8536b2ea95d7b9bd585 +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/sev.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 054f2d846a..a47f0d3880 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -504,6 +504,15 @@ sev_get_capabilities(Error **errp) + uint32_t ebx; + int fd; + ++ if (!kvm_enabled()) { ++ error_setg(errp, "KVM not enabled"); ++ return NULL; ++ } ++ if (kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, NULL) < 0) { ++ error_setg(errp, "SEV is not enabled in KVM"); ++ return NULL; ++ } ++ + fd = open(DEFAULT_SEV_DEVICE, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "Failed to open %s", +-- +2.27.0 + diff --git a/SOURCES/kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch b/SOURCES/kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch new file mode 100644 index 0000000..e5f3459 --- /dev/null +++ b/SOURCES/kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch @@ -0,0 +1,142 @@ +From 8789f2662c6ddacc5472a803d253b94d93c6e9f0 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 Jul 2020 18:08:34 -0400 +Subject: [PATCH 2/4] target/i386: sev: provide proper error reporting for + query-sev-capabilities +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200731180835.86786-2-pbonzini@redhat.com> +Patchwork-id: 98123 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/2] target/i386: sev: provide proper error reporting for query-sev-capabilities +Bugzilla: 1689341 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Dr. David Alan Gilbert + +The query-sev-capabilities was reporting errors through error_report; +change it to use Error** so that the cause of the failure is clearer. + +Reviewed-by: Eric Blake +Signed-off-by: Paolo Bonzini +Cherry picked from commit e4f6278557148151e77260b872b41bcd7ceb4737 +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/monitor.c | 10 +--------- + target/i386/sev-stub.c | 3 ++- + target/i386/sev.c | 18 +++++++++--------- + target/i386/sev_i386.h | 2 +- + 4 files changed, 13 insertions(+), 20 deletions(-) + +diff --git a/target/i386/monitor.c b/target/i386/monitor.c +index 9fb4d641d5..cfd8075e4f 100644 +--- a/target/i386/monitor.c ++++ b/target/i386/monitor.c +@@ -727,13 +727,5 @@ SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) + + SevCapability *qmp_query_sev_capabilities(Error **errp) + { +- SevCapability *data; +- +- data = sev_get_capabilities(); +- if (!data) { +- error_setg(errp, "SEV feature is not available"); +- return NULL; +- } +- +- return data; ++ return sev_get_capabilities(errp); + } +diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c +index e5ee13309c..88e3f39a1e 100644 +--- a/target/i386/sev-stub.c ++++ b/target/i386/sev-stub.c +@@ -44,7 +44,8 @@ char *sev_get_launch_measurement(void) + return NULL; + } + +-SevCapability *sev_get_capabilities(void) ++SevCapability *sev_get_capabilities(Error **errp) + { ++ error_setg(errp, "SEV is not available in this QEMU"); + return NULL; + } +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 024bb24e51..054f2d846a 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -453,7 +453,7 @@ sev_get_info(void) + + static int + sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, +- size_t *cert_chain_len) ++ size_t *cert_chain_len, Error **errp) + { + guchar *pdh_data = NULL; + guchar *cert_chain_data = NULL; +@@ -464,8 +464,8 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, + r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err); + if (r < 0) { + if (err != SEV_RET_INVALID_LEN) { +- error_report("failed to export PDH cert ret=%d fw_err=%d (%s)", +- r, err, fw_error_to_str(err)); ++ error_setg(errp, "failed to export PDH cert ret=%d fw_err=%d (%s)", ++ r, err, fw_error_to_str(err)); + return 1; + } + } +@@ -477,8 +477,8 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, + + r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err); + if (r < 0) { +- error_report("failed to export PDH cert ret=%d fw_err=%d (%s)", +- r, err, fw_error_to_str(err)); ++ error_setg(errp, "failed to export PDH cert ret=%d fw_err=%d (%s)", ++ r, err, fw_error_to_str(err)); + goto e_free; + } + +@@ -495,7 +495,7 @@ e_free: + } + + SevCapability * +-sev_get_capabilities(void) ++sev_get_capabilities(Error **errp) + { + SevCapability *cap = NULL; + guchar *pdh_data = NULL; +@@ -506,13 +506,13 @@ sev_get_capabilities(void) + + fd = open(DEFAULT_SEV_DEVICE, O_RDWR); + if (fd < 0) { +- error_report("%s: Failed to open %s '%s'", __func__, +- DEFAULT_SEV_DEVICE, strerror(errno)); ++ error_setg_errno(errp, errno, "Failed to open %s", ++ DEFAULT_SEV_DEVICE); + return NULL; + } + + if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, +- &cert_chain_data, &cert_chain_len)) { ++ &cert_chain_data, &cert_chain_len, errp)) { + goto out; + } + +diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h +index 8ada9d385d..1e073342ba 100644 +--- a/target/i386/sev_i386.h ++++ b/target/i386/sev_i386.h +@@ -38,7 +38,7 @@ extern SevInfo *sev_get_info(void); + extern uint32_t sev_get_cbit_position(void); + extern uint32_t sev_get_reduced_phys_bits(void); + extern char *sev_get_launch_measurement(void); +-extern SevCapability *sev_get_capabilities(void); ++extern SevCapability *sev_get_capabilities(Error **errp); + + typedef struct QSevGuestInfo QSevGuestInfo; + typedef struct QSevGuestInfoClass QSevGuestInfoClass; +-- +2.27.0 + diff --git a/SOURCES/kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch b/SOURCES/kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch new file mode 100644 index 0000000..38e5637 --- /dev/null +++ b/SOURCES/kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch @@ -0,0 +1,60 @@ +From c4fe37ae6d75ed72e6a3bde01fea053eb508274c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 5 Jun 2020 07:41:11 -0400 +Subject: [PATCH 41/42] target/s390x/kvm: Enable adapter interruption + suppression again +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20200605074111.2185-4-thuth@redhat.com> +Patchwork-id: 97370 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 3/3] target/s390x/kvm: Enable adapter interruption suppression again +Bugzilla: 1756946 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +The AIS feature has been disabled late in the v2.10 development cycle since +there were some issues with migration (see commit 3f2d07b3b01ea61126b - +"s390x/ais: for 2.10 stable: disable ais facility"). We originally wanted +to enable it again for newer machine types, but apparently we forgot to do +this so far. Let's do it now for the machines that support proper CPU models. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1756946 +Signed-off-by: Thomas Huth +Message-Id: <20200122101437.5069-1-thuth@redhat.com> +Reviewed-by: David Hildenbrand +Tested-by: Matthew Rosato +Signed-off-by: Cornelia Huck +(cherry picked from commit a5c8617af6919515b84256978452edf07401c45e) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/kvm.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index c589ef9034..0bbf8f81b0 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -377,10 +377,13 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + /* + * The migration interface for ais was introduced with kernel 4.13 + * but the capability itself had been active since 4.12. As migration +- * support is considered necessary let's disable ais in the 2.10 +- * machine. ++ * support is considered necessary, we only try to enable this for ++ * newer machine types if KVM_CAP_S390_AIS_MIGRATION is available. + */ +- /* kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); */ ++ if (cpu_model_allowed() && kvm_kernel_irqchip_allowed() && ++ kvm_check_extension(s, KVM_CAP_S390_AIS_MIGRATION)) { ++ kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); ++ } + + kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES); + return 0; +-- +2.27.0 + diff --git a/SOURCES/kvm-tcp_emu-Fix-oob-access.patch b/SOURCES/kvm-tcp_emu-Fix-oob-access.patch new file mode 100644 index 0000000..e532877 --- /dev/null +++ b/SOURCES/kvm-tcp_emu-Fix-oob-access.patch @@ -0,0 +1,59 @@ +From 5c2c5496083fa549e1dff903413bb6136fc19d8d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:56 +0100 +Subject: [PATCH 1/4] tcp_emu: Fix oob access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-2-marcandre.lureau@redhat.com> +Patchwork-id: 93399 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] tcp_emu: Fix oob access +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Samuel Thibault + +The main loop only checks for one available byte, while we sometimes +need two bytes. + +[ MA - minor conflict, CHANGELOG.md absent ] +(cherry picked from libslirp commit 2655fffed7a9e765bcb4701dd876e9dab975f289) +Signed-off-by: Marc-André Lureau + +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index d6dd133..cbecd64 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + break; + + case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of +@@ -901,6 +904,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + /* This is the field containing the port + * number that RA-player is listening to. + */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; + if (lport < 6970) + lport += 256; /* don't know why */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch b/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch new file mode 100644 index 0000000..846da73 --- /dev/null +++ b/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch @@ -0,0 +1,149 @@ +From 9a7810c257711ce02627916d886fc1029f7a8190 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Thu, 13 Feb 2020 15:50:49 +0000 +Subject: [PATCH 3/7] tcp_emu: fix unsafe snprintf() usages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200213155049.3936-3-jmaloy@redhat.com> +Patchwork-id: 93826 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] tcp_emu: fix unsafe snprintf() usages +Bugzilla: 1798994 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi + +From: Marc-André Lureau + +Various calls to snprintf() assume that snprintf() returns "only" the +number of bytes written (excluding terminating NUL). + +https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 + +"Upon successful completion, the snprintf() function shall return the +number of bytes that would be written to s had n been sufficiently +large excluding the terminating null byte." + +Before patch ce131029, if there isn't enough room in "m_data" for the +"DCC ..." message, we overflow "m_data". + +After the patch, if there isn't enough room for the same, we don't +overflow "m_data", but we set "m_len" out-of-bounds. The next time an +access is bounded by "m_len", we'll have a buffer overflow then. + +Use slirp_fmt*() to fix potential OOB memory access. + +Reported-by: Laszlo Ersek +Signed-off-by: Marc-André Lureau +Reviewed-by: Samuel Thibault +Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> +(cherry picked from libslirp commit 68ccb8021a838066f0951d4b2817eb6b6f10a843) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/tcp_subr.c | 44 +++++++++++++++++++++----------------------- + 1 file changed, 21 insertions(+), 23 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index 954d1a6..26d4ead 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + NTOHS(n1); + NTOHS(n2); + m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); +- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); +- assert(m->m_len < M_ROOM(m)); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); + } else { + *eol = '\r'; + } +@@ -696,9 +695,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, +- n5, n6, x == 7 ? buff : ""); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + return 1; + } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { + /* +@@ -731,10 +730,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", +- n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); +- ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + return 1; + } + +@@ -757,8 +755,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) +- m->m_len = snprintf(m->m_data, M_ROOM(m), +- "%d", ntohs(so->so_fport)) + 1; ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); + return 1; + + case EMU_IRC: +@@ -777,10 +775,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC CHAT chat %lu %u%c\n", +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, + &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), +@@ -788,10 +786,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC SEND %s %lu %u %u%c\n", buff, +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), n1, 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, + &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), +@@ -799,10 +797,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC MOVE %s %lu %u %u%c\n", buff, +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), n1, 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); + } + return 1; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch b/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch new file mode 100644 index 0000000..e8a48bf --- /dev/null +++ b/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch @@ -0,0 +1,106 @@ +From 323889aa2182bf39df10f1caf43f22daea2d7d37 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:12 +0000 +Subject: [PATCH 10/15] tests/arm-cpu-features: Check feature default values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-4-drjones@redhat.com> +Patchwork-id: 93626 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/5] tests/arm-cpu-features: Check feature default values +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + tests/arm-cpu-features: Check feature default values + + If we know what the default value should be then we can test for + that as well as the feature existence. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101023.16030-5-drjones@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit 789a35efb583464f9fcd5d871a7fd6164318bb91) +Signed-off-by: Danilo C. L. de Paula +--- + tests/arm-cpu-features.c | 37 ++++++++++++++++++++++++++++--------- + 1 file changed, 28 insertions(+), 9 deletions(-) + +diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c +index 6e99aa9..89285ca 100644 +--- a/tests/arm-cpu-features.c ++++ b/tests/arm-cpu-features.c +@@ -159,6 +159,25 @@ static bool resp_get_feature(QDict *resp, const char *feature) + qobject_unref(_resp); \ + }) + ++#define assert_feature(qts, cpu_type, feature, expected_value) \ ++({ \ ++ QDict *_resp, *_props; \ ++ \ ++ _resp = do_query_no_props(qts, cpu_type); \ ++ g_assert(_resp); \ ++ g_assert(resp_has_props(_resp)); \ ++ _props = resp_get_props(_resp); \ ++ g_assert(qdict_get(_props, feature)); \ ++ g_assert(qdict_get_bool(_props, feature) == (expected_value)); \ ++ qobject_unref(_resp); \ ++}) ++ ++#define assert_has_feature_enabled(qts, cpu_type, feature) \ ++ assert_feature(qts, cpu_type, feature, true) ++ ++#define assert_has_feature_disabled(qts, cpu_type, feature) \ ++ assert_feature(qts, cpu_type, feature, false) ++ + static void assert_type_full(QTestState *qts) + { + const char *error; +@@ -405,16 +424,16 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ +- assert_has_feature(qts, "max", "pmu"); +- assert_has_feature(qts, "cortex-a15", "pmu"); ++ assert_has_feature_enabled(qts, "max", "pmu"); ++ assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); + + if (g_str_equal(qtest_get_arch(), "aarch64")) { +- assert_has_feature(qts, "max", "aarch64"); +- assert_has_feature(qts, "max", "sve"); +- assert_has_feature(qts, "max", "sve128"); +- assert_has_feature(qts, "cortex-a57", "pmu"); +- assert_has_feature(qts, "cortex-a57", "aarch64"); ++ assert_has_feature_enabled(qts, "max", "aarch64"); ++ assert_has_feature_enabled(qts, "max", "sve"); ++ assert_has_feature_enabled(qts, "max", "sve128"); ++ assert_has_feature_enabled(qts, "cortex-a57", "pmu"); ++ assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + + sve_tests_default(qts, "max"); + +@@ -451,8 +470,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + +- assert_has_feature(qts, "host", "aarch64"); +- assert_has_feature(qts, "host", "pmu"); ++ assert_has_feature_enabled(qts, "host", "aarch64"); ++ assert_has_feature_enabled(qts, "host", "pmu"); + + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch b/SOURCES/kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch new file mode 100644 index 0000000..12df637 --- /dev/null +++ b/SOURCES/kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch @@ -0,0 +1,127 @@ +From 6d549629becb69f315dd4213f730122d19c9c566 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:54 +0100 +Subject: [PATCH 11/12] tests/bios-tables-test: add test cases for ACPI HMAT + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-11-plai@redhat.com> +Patchwork-id: 96739 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 10/11] tests/bios-tables-test: add test cases for ACPI HMAT +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Tao Xu + +ACPI table HMAT has been introduced, QEMU now builds HMAT tables for +Heterogeneous Memory with boot option '-numa node'. + +Add test cases on PC and Q35 machines with 2 numa nodes. +Because HMAT is generated when system enable numa, the +following tables need to be added for this test: + tests/data/acpi/pc/APIC.acpihmat + tests/data/acpi/pc/SRAT.acpihmat + tests/data/acpi/pc/HMAT.acpihmat + tests/data/acpi/pc/DSDT.acpihmat + tests/data/acpi/q35/APIC.acpihmat + tests/data/acpi/q35/SRAT.acpihmat + tests/data/acpi/q35/HMAT.acpihmat + tests/data/acpi/q35/DSDT.acpihmat + +Acked-by: Markus Armbruster +Reviewed-by: Igor Mammedov +Reviewed-by: Daniel Black +Reviewed-by: Jingqi Liu +Suggested-by: Igor Mammedov +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-9-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1c8f85d93d261dc555a0aad6f54f2b5e8009d859) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + tests/bios-tables-test-allowed-diff.h | 8 +++++++ + tests/bios-tables-test.c | 44 +++++++++++++++++++++++++++++++++++ + 2 files changed, 52 insertions(+) + +diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h +index dfb8523..3c9e0c9 100644 +--- a/tests/bios-tables-test-allowed-diff.h ++++ b/tests/bios-tables-test-allowed-diff.h +@@ -1 +1,9 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/pc/APIC.acpihmat", ++"tests/data/acpi/pc/SRAT.acpihmat", ++"tests/data/acpi/pc/HMAT.acpihmat", ++"tests/data/acpi/pc/DSDT.acpihmat", ++"tests/data/acpi/q35/APIC.acpihmat", ++"tests/data/acpi/q35/SRAT.acpihmat", ++"tests/data/acpi/q35/HMAT.acpihmat", ++"tests/data/acpi/q35/DSDT.acpihmat", +diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c +index 79f5da0..9823820 100644 +--- a/tests/bios-tables-test.c ++++ b/tests/bios-tables-test.c +@@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void) + + } + ++static void test_acpi_tcg_acpi_hmat(const char *machine) ++{ ++ test_data data; ++ ++ memset(&data, 0, sizeof(data)); ++ data.machine = machine; ++ data.variant = ".acpihmat"; ++ test_acpi_one(" -machine hmat=on" ++ " -smp 2,sockets=2" ++ " -m 128M,slots=2,maxmem=1G" ++ " -object memory-backend-ram,size=64M,id=m0" ++ " -object memory-backend-ram,size=64M,id=m1" ++ " -numa node,nodeid=0,memdev=m0" ++ " -numa node,nodeid=1,memdev=m1,initiator=0" ++ " -numa cpu,node-id=0,socket-id=0" ++ " -numa cpu,node-id=0,socket-id=1" ++ " -numa hmat-lb,initiator=0,target=0,hierarchy=memory," ++ "data-type=access-latency,latency=1" ++ " -numa hmat-lb,initiator=0,target=0,hierarchy=memory," ++ "data-type=access-bandwidth,bandwidth=65534M" ++ " -numa hmat-lb,initiator=0,target=1,hierarchy=memory," ++ "data-type=access-latency,latency=65534" ++ " -numa hmat-lb,initiator=0,target=1,hierarchy=memory," ++ "data-type=access-bandwidth,bandwidth=32767M" ++ " -numa hmat-cache,node-id=0,size=10K,level=1," ++ "associativity=direct,policy=write-back,line=8" ++ " -numa hmat-cache,node-id=1,size=10K,level=1," ++ "associativity=direct,policy=write-back,line=8", ++ &data); ++ free_test_data(&data); ++} ++ ++static void test_acpi_q35_tcg_acpi_hmat(void) ++{ ++ test_acpi_tcg_acpi_hmat(MACHINE_Q35); ++} ++ ++static void test_acpi_piix4_tcg_acpi_hmat(void) ++{ ++ test_acpi_tcg_acpi_hmat(MACHINE_PC); ++} ++ + static void test_acpi_virt_tcg(void) + { + test_data data = { +@@ -991,6 +1033,8 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem); + qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm); + qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); ++ qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat); ++ qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat); + } else if (strcmp(arch, "aarch64") == 0) { + qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch b/SOURCES/kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch new file mode 100644 index 0000000..240c408 --- /dev/null +++ b/SOURCES/kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch @@ -0,0 +1,60 @@ +From f73b18e03c6758500bf367b1575205772d1f878f Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:53:52 -0400 +Subject: [PATCH 10/42] tests/boot-sector: Fix the bad s390x assembler code + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-11-thuth@redhat.com> +Patchwork-id: 97031 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 10/38] tests/boot-sector: Fix the bad s390x assembler code +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +There are currently two bugs in s390x_code[]: First, the initial jump +uses the wrong offset, so it was jumping to 0x10014 instead of 0x10010. +Second, LHI only loads the lower 32-bit of the register. + +Everything worked fine as long as the s390-ccw bios code was jumping +here with r3 containing zeroes in the uppermost 48 bit - which just +happened to be the case so far by accident. But we can not rely on this +fact, and indeed one of the recent suggested patches to jump2ipl.c cause +the newer GCCs to put different values into r3. In that case the code +from s390x_code[] crashes very ungracefully. + +Thus let's make sure to jump to the right instruction, and use LGHI +instead of LHI to make sure that we always zero out the upper bits +of the register. + +Signed-off-by: Thomas Huth +Message-Id: <20191217150642.27946-1-thuth@redhat.com> +Reviewed-by: Christian Borntraeger +Signed-off-by: Cornelia Huck +(cherry picked from commit 5afec76fbe2c07d03fd8c9ac525140059499637a) +Signed-off-by: Danilo C. L. de Paula +--- + tests/boot-sector.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/boot-sector.c b/tests/boot-sector.c +index 7824286b9a..9e66c6d013 100644 +--- a/tests/boot-sector.c ++++ b/tests/boot-sector.c +@@ -75,11 +75,11 @@ static const uint8_t s390x_psw_and_magic[] = { + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 /* in the s390-ccw bios */ + }; + static const uint8_t s390x_code[] = { +- 0xa7, 0xf4, 0x00, 0x0a, /* j 0x10010 */ ++ 0xa7, 0xf4, 0x00, 0x08, /* j 0x10010 */ + 0x00, 0x00, 0x00, 0x00, + 'S', '3', '9', '0', + 'E', 'P', 0x00, 0x01, +- 0xa7, 0x38, HIGH(SIGNATURE_ADDR), LOW(SIGNATURE_ADDR), /* lhi r3,0x7c10 */ ++ 0xa7, 0x39, HIGH(SIGNATURE_ADDR), LOW(SIGNATURE_ADDR), /* lghi r3,0x7c10 */ + 0xa7, 0x48, LOW(SIGNATURE), HIGH(SIGNATURE), /* lhi r4,0xadde */ + 0x40, 0x40, 0x30, 0x00, /* sth r4,0(r3) */ + 0xa7, 0xf4, 0xff, 0xfa /* j 0x10010 */ +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch b/SOURCES/kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch new file mode 100644 index 0000000..41ee71c --- /dev/null +++ b/SOURCES/kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch @@ -0,0 +1,266 @@ +From 0f11aae02dcabd3a5ee0b5946aec39da6dddea52 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 May 2020 23:56:53 +0100 +Subject: [PATCH 10/12] tests/numa: Add case for QMP build HMAT + +RH-Author: plai@redhat.com +Message-id: <20200521235655.27141-10-plai@redhat.com> +Patchwork-id: 96735 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 09/11] tests/numa: Add case for QMP build HMAT +Bugzilla: 1600217 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +From: Tao Xu + +Check configuring HMAT usecase + +Acked-by: Markus Armbruster +Suggested-by: Igor Mammedov +Signed-off-by: Tao Xu +Message-Id: <20191213011929.2520-8-tao3.xu@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +(cherry picked from commit d00817c944ed15fbe4a61d44fe7f9fe166c7df88) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + tests/numa-test.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 213 insertions(+) + +diff --git a/tests/numa-test.c b/tests/numa-test.c +index 8de8581..17dd807 100644 +--- a/tests/numa-test.c ++++ b/tests/numa-test.c +@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data) + qtest_quit(qs); + } + ++static void pc_hmat_build_cfg(const void *data) ++{ ++ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on " ++ "-smp 2,sockets=2 " ++ "-m 128M,slots=2,maxmem=1G " ++ "-object memory-backend-ram,size=64M,id=m0 " ++ "-object memory-backend-ram,size=64M,id=m1 " ++ "-numa node,nodeid=0,memdev=m0 " ++ "-numa node,nodeid=1,memdev=m1,initiator=0 " ++ "-numa cpu,node-id=0,socket-id=0 " ++ "-numa cpu,node-id=0,socket-id=1", ++ data ? (char *)data : ""); ++ ++ /* Fail: Initiator should be less than the number of nodes */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }"))); ++ ++ /* Fail: Target should be less than the number of nodes */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }"))); ++ ++ /* Fail: Initiator should contain cpu */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }"))); ++ ++ /* Fail: Data-type mismatch */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"write-latency\"," ++ " 'bandwidth': 524288000 } }"))); ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\"," ++ " 'latency': 5 } }"))); ++ ++ /* Fail: Bandwidth should be 1MB (1048576) aligned */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," ++ " 'bandwidth': 1048575 } }"))); ++ ++ /* Configuring HMAT bandwidth and latency details */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," ++ " 'latency': 1 } }"))); /* 1 ns */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," ++ " 'latency': 5 } }"))); /* Fail: Duplicate configuration */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," ++ " 'bandwidth': 68717379584 } }"))); /* 65534 MB/s */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," ++ " 'latency': 65534 } }"))); /* 65534 ns */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," ++ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," ++ " 'bandwidth': 34358689792 } }"))); /* 32767 MB/s */ ++ ++ /* Fail: node_id should be less than the number of nodes */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 2, 'size': 10240," ++ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ ++ /* Fail: level should be less than HMAT_LB_LEVELS (4) */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 4, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ ++ /* Fail: associativity option should be 'none', if level is 0 */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 0, 'associativity': \"direct\", 'policy': \"none\"," ++ " 'line': 0 } }"))); ++ /* Fail: policy option should be 'none', if level is 0 */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 0, 'associativity': \"none\", 'policy': \"write-back\"," ++ " 'line': 0 } }"))); ++ /* Fail: line option should be 0, if level is 0 */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 0, 'associativity': \"none\", 'policy': \"none\"," ++ " 'line': 8 } }"))); ++ ++ /* Configuring HMAT memory side cache attributes */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); /* Fail: Duplicate configuration */ ++ /* Fail: The size of level 2 size should be small than level 1 */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 2, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ /* Fail: The size of level 0 size should be larger than level 1 */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 0, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 1, 'size': 10240," ++ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ ++ /* let machine initialization to complete and run */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, ++ "{ 'execute': 'x-exit-preconfig' }"))); ++ qtest_qmp_eventwait(qs, "RESUME"); ++ ++ qtest_quit(qs); ++} ++ ++static void pc_hmat_off_cfg(const void *data) ++{ ++ QTestState *qs = qtest_initf("%s -nodefaults --preconfig " ++ "-smp 2,sockets=2 " ++ "-m 128M,slots=2,maxmem=1G " ++ "-object memory-backend-ram,size=64M,id=m0 " ++ "-object memory-backend-ram,size=64M,id=m1 " ++ "-numa node,nodeid=0,memdev=m0", ++ data ? (char *)data : ""); ++ ++ /* ++ * Fail: Enable HMAT with -machine hmat=on ++ * before using any of hmat specific options ++ */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\"," ++ " 'initiator': 0 } }"))); ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\" } }"))); ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," ++ " 'latency': 1 } }"))); ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ ++ /* let machine initialization to complete and run */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, ++ "{ 'execute': 'x-exit-preconfig' }"))); ++ qtest_qmp_eventwait(qs, "RESUME"); ++ ++ qtest_quit(qs); ++} ++ ++static void pc_hmat_erange_cfg(const void *data) ++{ ++ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on " ++ "-smp 2,sockets=2 " ++ "-m 128M,slots=2,maxmem=1G " ++ "-object memory-backend-ram,size=64M,id=m0 " ++ "-object memory-backend-ram,size=64M,id=m1 " ++ "-numa node,nodeid=0,memdev=m0 " ++ "-numa node,nodeid=1,memdev=m1,initiator=0 " ++ "-numa cpu,node-id=0,socket-id=0 " ++ "-numa cpu,node-id=0,socket-id=1", ++ data ? (char *)data : ""); ++ ++ /* Can't store the compressed latency */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," ++ " 'latency': 1 } }"))); /* 1 ns */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," ++ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," ++ " 'latency': 65535 } }"))); /* 65535 ns */ ++ ++ /* Test the 0 input (bandwidth not provided) */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," ++ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," ++ " 'bandwidth': 0 } }"))); /* 0 MB/s */ ++ /* Fail: bandwidth should be provided before memory side cache attributes */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," ++ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," ++ " 'line': 8 } }"))); ++ ++ /* Can't store the compressed bandwidth */ ++ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," ++ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," ++ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," ++ " 'bandwidth': 68718428160 } }"))); /* 65535 MB/s */ ++ ++ /* let machine initialization to complete and run */ ++ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, ++ "{ 'execute': 'x-exit-preconfig' }"))); ++ qtest_qmp_eventwait(qs, "RESUME"); ++ ++ qtest_quit(qs); ++} ++ + int main(int argc, char **argv) + { + const char *args = NULL; +@@ -346,6 +556,9 @@ int main(int argc, char **argv) + if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) { + qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); + qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg); ++ qtest_add_data_func("/numa/pc/hmat/build", args, pc_hmat_build_cfg); ++ qtest_add_data_func("/numa/pc/hmat/off", args, pc_hmat_off_cfg); ++ qtest_add_data_func("/numa/pc/hmat/erange", args, pc_hmat_erange_cfg); + } + + if (!strcmp(arch, "ppc64")) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch b/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch new file mode 100644 index 0000000..3efef47 --- /dev/null +++ b/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch @@ -0,0 +1,55 @@ +From e483eea891139ee38138381ba6715b3a2be050cc Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:12 +0000 +Subject: [PATCH 16/18] tools/virtiofsd/fuse_lowlevel: Fix + fuse_out_header::error value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-6-dgilbert@redhat.com> +Patchwork-id: 94128 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/7] tools/virtiofsd/fuse_lowlevel: Fix fuse_out_header::error value +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Philippe Mathieu-Daudé + +Fix warning reported by Clang static code analyzer: + + CC tools/virtiofsd/fuse_lowlevel.o + tools/virtiofsd/fuse_lowlevel.c:195:9: warning: Value stored to 'error' is never read + error = -ERANGE; + ^ ~~~~~~~ + +Fixes: 3db2876 +Reported-by: Clang Static Analyzer +Reviewed-by: Ján Tomko +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 09c086b2a144324199f99a7d4de78c3276a486c1) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_lowlevel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 704c036..2dd36ec 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -192,7 +192,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + + if (error <= -1000 || error > 0) { + fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); +- error = -ERANGE; ++ out.error = -ERANGE; + } + + iov[0].iov_base = &out; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch b/SOURCES/kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch new file mode 100644 index 0000000..6af549a --- /dev/null +++ b/SOURCES/kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch @@ -0,0 +1,56 @@ +From 8ce8ccc2a22798a89bac06a37427c3a3cea91a62 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 5 May 2020 16:35:54 +0100 +Subject: [PATCH 3/9] tools/virtiofsd/passthrough_ll: Fix double close() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200505163600.22956-2-dgilbert@redhat.com> +Patchwork-id: 96269 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/7] tools/virtiofsd/passthrough_ll: Fix double close() +Bugzilla: 1817445 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Michael S. Tsirkin + +From: Philippe Mathieu-Daudé + +On success, the fdopendir() call closes fd. Later on the error +path we try to close an already-closed fd. This can lead to +use-after-free. Fix by only closing the fd if the fdopendir() +call failed. + +Cc: qemu-stable@nongnu.org +Fixes: b39bce121b (add dirp_map to hide lo_dirp pointers) +Reported-by: Coverity (CID 1421933 USE_AFTER_FREE) +Suggested-by: Peter Maydell +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20200321120654.7985-1-philmd@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e1cd92d95cd4f97b3464c4e08cd5b22bf5ca05cb) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9cba3f1..50ff672 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1524,8 +1524,7 @@ out_err: + if (d) { + if (d->dp) { + closedir(d->dp); +- } +- if (fd != -1) { ++ } else if (fd != -1) { + close(fd); + } + free(d); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch b/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch new file mode 100644 index 0000000..32c971d --- /dev/null +++ b/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch @@ -0,0 +1,58 @@ +From 7cb1c5e1416de9a09180f0930d2a216c77e8cdbd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 30 Jan 2020 16:01:10 +0000 +Subject: [PATCH 07/15] tpm-ppi: page-align PPI RAM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200130160110.126086-1-marcandre.lureau@redhat.com> +Patchwork-id: 93600 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] tpm-ppi: page-align PPI RAM +Bugzilla: 1787444 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +post-copy migration fails on destination with error such as: +2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: +Unaligned start address: 0x559d2afae9a0 + +Use qemu_memalign() to constrain the PPI RAM memory alignment. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Berger +Signed-off-by: Stefan Berger +Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1787444 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=26122940 + +(cherry picked from commit 71e415c8a75c130875f14d6b2136825789feb297) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + hw/tpm/tpm_ppi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c +index ff31459..6d9c1a3 100644 +--- a/hw/tpm/tpm_ppi.c ++++ b/hw/tpm/tpm_ppi.c +@@ -43,7 +43,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) + void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, + hwaddr addr, Object *obj) + { +- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); ++ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, ++ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); + memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", + TPM_PPI_ADDR_SIZE, tpmppi->buf); + vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch b/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch new file mode 100644 index 0000000..c49aecd --- /dev/null +++ b/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch @@ -0,0 +1,82 @@ +From e7cdcd1e39c4c030a32c9e8ef79316eae8555bc8 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 16 Jan 2020 17:52:48 +0000 +Subject: [PATCH 04/15] trace: update qemu-trace-stap to Python 3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20200116175248.286556-2-stefanha@redhat.com> +Patchwork-id: 93365 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] trace: update qemu-trace-stap to Python 3 +Bugzilla: 1787395 +RH-Acked-by: John Snow +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Dr. David Alan Gilbert + +qemu-trace-stap does not support Python 3 yet: + + $ scripts/qemu-trace-stap list path/to/qemu-system-x86_64 + Traceback (most recent call last): + File "scripts/qemu-trace-stap", line 175, in + main() + File "scripts/qemu-trace-stap", line 171, in main + args.func(args) + File "scripts/qemu-trace-stap", line 118, in cmd_list + print_probes(args.verbose, "*") + File "scripts/qemu-trace-stap", line 114, in print_probes + if line.startswith(prefix): + TypeError: startswith first arg must be bytes or a tuple of bytes, not str + +Now that QEMU requires Python 3.5 or later we can switch to pure Python +3. Use Popen()'s universal_newlines=True argument to treat stdout as +text instead of binary. + +Fixes: 62dd1048c0bd ("trace: add ability to do simple printf logging via systemtap") +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1787395 +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-id: 20200107112438.383958-1-stefanha@redhat.com +Message-Id: <20200107112438.383958-1-stefanha@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 3f0097169bb60268cc5dda0c5ea47c31ab57b22f) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + scripts/qemu-trace-stap | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap +index 91d1051..90527eb 100755 +--- a/scripts/qemu-trace-stap ++++ b/scripts/qemu-trace-stap +@@ -1,4 +1,4 @@ +-#!/usr/bin/python ++#!/usr/bin/env python3 + # -*- python -*- + # + # Copyright (C) 2019 Red Hat, Inc +@@ -18,8 +18,6 @@ + # You should have received a copy of the GNU General Public License + # along with this program; if not, see . + +-from __future__ import print_function +- + import argparse + import copy + import os.path +@@ -104,7 +102,9 @@ def cmd_list(args): + if verbose: + print("Listing probes with name '%s'" % script) + proc = subprocess.Popen(["stap", "-l", script], +- stdout=subprocess.PIPE, env=tapset_env(tapsets)) ++ stdout=subprocess.PIPE, ++ universal_newlines=True, ++ env=tapset_env(tapsets)) + out, err = proc.communicate() + if proc.returncode != 0: + print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary)) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch b/SOURCES/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch new file mode 100644 index 0000000..5e63299 --- /dev/null +++ b/SOURCES/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch @@ -0,0 +1,102 @@ +From feb16ff29a13a4286389bb8b9d4f541aab9b84f1 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 3 Sep 2020 15:27:13 -0400 +Subject: [PATCH] usb: fix setup_len init (CVE-2020-14364) + +RH-Author: Jon Maloy +Message-id: <20200903152713.1420531-2-jmaloy@redhat.com> +Patchwork-id: 98271 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] usb: fix setup_len init (CVE-2020-14364) +Bugzilla: 1869710 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Thomas Huth +RH-Acked-by: Gerd Hoffmann + +From: Gerd Hoffmann + +Store calculated setup_len in a local variable, verify it, and only +write it to the struct (USBDevice->setup_len) in case it passed the +sanity checks. + +This prevents other code (do_token_{in,out} functions specifically) +from working with invalid USBDevice->setup_len values and overrunning +the USBDevice->setup_buf[] buffer. + +Fixes: CVE-2020-14364 +Signed-off-by: Gerd Hoffmann +Tested-by: Gonglei +Reviewed-by: Li Qiang +Message-id: 20200825053636.29648-1-kraxel@redhat.com +(cherry picked from commit b946434f2659a182afc17e155be6791ebfb302eb) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/core.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 5abd128b6b..5234dcc73f 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) + static void do_token_setup(USBDevice *s, USBPacket *p) + { + int request, value, index; ++ unsigned int setup_len; + + if (p->iov.size != 8) { + p->status = USB_RET_STALL; +@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p) + usb_packet_copy(p, s->setup_buf, p->iov.size); + s->setup_index = 0; + p->actual_length = 0; +- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; +- if (s->setup_len > sizeof(s->data_buf)) { ++ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; ++ if (setup_len > sizeof(s->data_buf)) { + fprintf(stderr, + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", +- s->setup_len, sizeof(s->data_buf)); ++ setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; + return; + } ++ s->setup_len = setup_len; + + request = (s->setup_buf[0] << 8) | s->setup_buf[1]; + value = (s->setup_buf[3] << 8) | s->setup_buf[2]; +@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p) + static void do_parameter(USBDevice *s, USBPacket *p) + { + int i, request, value, index; ++ unsigned int setup_len; + + for (i = 0; i < 8; i++) { + s->setup_buf[i] = p->parameter >> (i*8); + } + + s->setup_state = SETUP_STATE_PARAM; +- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; + s->setup_index = 0; + + request = (s->setup_buf[0] << 8) | s->setup_buf[1]; + value = (s->setup_buf[3] << 8) | s->setup_buf[2]; + index = (s->setup_buf[5] << 8) | s->setup_buf[4]; + +- if (s->setup_len > sizeof(s->data_buf)) { ++ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; ++ if (setup_len > sizeof(s->data_buf)) { + fprintf(stderr, + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", +- s->setup_len, sizeof(s->data_buf)); ++ setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; + return; + } ++ s->setup_len = setup_len; + + if (p->pid == USB_TOKEN_OUT) { + usb_packet_copy(p, s->data_buf, s->setup_len); +-- +2.27.0 + diff --git a/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch b/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch new file mode 100644 index 0000000..8f08256 --- /dev/null +++ b/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch @@ -0,0 +1,106 @@ +From 8f6311159977b8ee4b78172caa411d3cee4d2ae5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 14 Jan 2020 20:23:30 +0000 +Subject: [PATCH 4/5] usbredir: Prevent recursion in usbredir_write +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200114202331.51831-2-dgilbert@redhat.com> +Patchwork-id: 93344 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] usbredir: Prevent recursion in usbredir_write +Bugzilla: 1790844 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann + +From: "Dr. David Alan Gilbert" + +I've got a case where usbredir_write manages to call back into itself +via spice; this patch causes the recursion to fail (0 bytes) the write; +this seems to avoid the deadlock I was previously seeing. + +I can't say I fully understand the interaction of usbredir and spice; +but there are a few similar guards in spice and usbredir +to catch other cases especially onces also related to spice_server_char_device_wakeup + +This case seems to be triggered by repeated migration+repeated +reconnection of the viewer; but my debugging suggests the migration +finished before this hits. + +The backtrace of the hang looks like: + reds_handle_ticket + reds_handle_other_links + reds_channel_do_link + red_channel_connect + spicevmc_connect + usbredir_create_parser + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + spice_chr_write + spice_server_char_device_wakeup + red_char_device_wakeup + red_char_device_write_to_device + vmc_write + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + qemu_mutex_lock_impl + +and we fail as we land through qemu_chr_write_buffer's lock +twice. + +Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> +Signed-off-by: Gerd Hoffmann +(cherry picked from commit 394642a8d3742c885e397d5bb5ee0ec40743cdc6) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/redirect.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index e0f5ca6..97f2c3a 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -113,6 +113,7 @@ struct USBRedirDevice { + /* Properties */ + CharBackend cs; + bool enable_streams; ++ bool in_write; + uint8_t debug; + int32_t bootindex; + char *filter_str; +@@ -290,6 +291,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + return 0; + } + ++ /* Recursion check */ ++ if (dev->in_write) { ++ DPRINTF("usbredir_write recursion\n"); ++ return 0; ++ } ++ dev->in_write = true; ++ + r = qemu_chr_fe_write(&dev->cs, data, count); + if (r < count) { + if (!dev->watch) { +@@ -300,6 +308,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + r = 0; + } + } ++ dev->in_write = false; + return r; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-util-add-slirp_fmt-helpers.patch b/SOURCES/kvm-util-add-slirp_fmt-helpers.patch new file mode 100644 index 0000000..31af599 --- /dev/null +++ b/SOURCES/kvm-util-add-slirp_fmt-helpers.patch @@ -0,0 +1,140 @@ +From 5dc50c6bca059a9cda6677b1fd0187df1de78ed7 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Thu, 13 Feb 2020 15:50:48 +0000 +Subject: [PATCH 2/7] util: add slirp_fmt() helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200213155049.3936-2-jmaloy@redhat.com> +Patchwork-id: 93824 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] util: add slirp_fmt() helpers +Bugzilla: 1798994 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi + +From: Marc-André Lureau + +Various calls to snprintf() in libslirp assume that snprintf() returns +"only" the number of bytes written (excluding terminating NUL). + +https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 + +"Upon successful completion, the snprintf() function shall return the +number of bytes that would be written to s had n been sufficiently +large excluding the terminating null byte." + +Introduce slirp_fmt() that handles several pathological cases the +way libslirp usually expect: + +- treat error as fatal (instead of silently returning -1) + +- fmt0() will always \0 end + +- return the number of bytes actually written (instead of what would +have been written, which would usually result in OOB later), including +the ending \0 for fmt0() + +- warn if truncation happened (instead of ignoring) + +Other less common cases can still be handled with strcpy/snprintf() etc. + +Signed-off-by: Marc-André Lureau +Reviewed-by: Samuel Thibault +Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> +(cherry picked from libslirp commit 30648c03b27fb8d9611b723184216cd3174b6775) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/util.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + slirp/src/util.h | 3 +++ + 2 files changed, 65 insertions(+) + +diff --git a/slirp/src/util.c b/slirp/src/util.c +index e596087..e3b6257 100644 +--- a/slirp/src/util.c ++++ b/slirp/src/util.c +@@ -364,3 +364,65 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) + } + *q = '\0'; + } ++ ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv > size) { ++ g_critical("vsnprintf() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("vsnprintf() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +index 3c6223c..0558dfc 100644 +--- a/slirp/src/util.h ++++ b/slirp/src/util.h +@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) + + void slirp_pstrcpy(char *buf, int buf_size, const char *str); + ++int slirp_fmt(char *str, size_t size, const char *format, ...); ++int slirp_fmt0(char *str, size_t size, const char *format, ...); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch b/SOURCES/kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch new file mode 100644 index 0000000..c515676 --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch @@ -0,0 +1,175 @@ +From 58edd0fba4d9e98edfeb16139467d6035a1f4e61 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:42 -0400 +Subject: [PATCH 08/12] vfio-ccw: Add support for the CRW region and IRQ + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-9-cohuck@redhat.com> +Patchwork-id: 97698 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 8/9] vfio-ccw: Add support for the CRW region and IRQ +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +From: Farhan Ali + +The crw region can be used to obtain information about +Channel Report Words (CRW) from vfio-ccw driver. + +Currently only channel-path related CRWs are passed to +QEMU from vfio-ccw driver. + +Signed-off-by: Farhan Ali +Signed-off-by: Eric Farman +Reviewed-by: Cornelia Huck +Message-Id: <20200505125757.98209-7-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit f030532f2ad6eeb200034915e9c6357cce81b538) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/ccw.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 73 insertions(+) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 94a0d9840d..b72a505893 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -44,7 +44,11 @@ struct VFIOCCWDevice { + uint64_t schib_region_size; + uint64_t schib_region_offset; + struct ccw_schib_region *schib_region; ++ uint64_t crw_region_size; ++ uint64_t crw_region_offset; ++ struct ccw_crw_region *crw_region; + EventNotifier io_notifier; ++ EventNotifier crw_notifier; + bool force_orb_pfch; + bool warned_orb_pfch; + }; +@@ -254,6 +258,44 @@ static void vfio_ccw_reset(DeviceState *dev) + ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); + } + ++static void vfio_ccw_crw_read(VFIOCCWDevice *vcdev) ++{ ++ struct ccw_crw_region *region = vcdev->crw_region; ++ CRW crw; ++ int size; ++ ++ /* Keep reading CRWs as long as data is returned */ ++ do { ++ memset(region, 0, sizeof(*region)); ++ size = pread(vcdev->vdev.fd, region, vcdev->crw_region_size, ++ vcdev->crw_region_offset); ++ ++ if (size == -1) { ++ error_report("vfio-ccw: Read crw region failed with errno=%d", ++ errno); ++ break; ++ } ++ ++ if (region->crw == 0) { ++ /* No more CRWs to queue */ ++ break; ++ } ++ ++ memcpy(&crw, ®ion->crw, sizeof(CRW)); ++ ++ css_crw_add_to_queue(crw); ++ } while (1); ++} ++ ++static void vfio_ccw_crw_notifier_handler(void *opaque) ++{ ++ VFIOCCWDevice *vcdev = opaque; ++ ++ while (event_notifier_test_and_clear(&vcdev->crw_notifier)) { ++ vfio_ccw_crw_read(vcdev); ++ } ++} ++ + static void vfio_ccw_io_notifier_handler(void *opaque) + { + VFIOCCWDevice *vcdev = opaque; +@@ -340,6 +382,10 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, + notifier = &vcdev->io_notifier; + fd_read = vfio_ccw_io_notifier_handler; + break; ++ case VFIO_CCW_CRW_IRQ_INDEX: ++ notifier = &vcdev->crw_notifier; ++ fd_read = vfio_ccw_crw_notifier_handler; ++ break; + default: + error_setg(errp, "vfio: Unsupported device irq(%d)", irq); + return; +@@ -391,6 +437,9 @@ static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, + case VFIO_CCW_IO_IRQ_INDEX: + notifier = &vcdev->io_notifier; + break; ++ case VFIO_CCW_CRW_IRQ_INDEX: ++ notifier = &vcdev->crw_notifier; ++ break; + default: + error_report("vfio: Unsupported device irq(%d)", irq); + return; +@@ -468,10 +517,24 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) + vcdev->schib_region = g_malloc(info->size); + } + ++ ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, ++ VFIO_REGION_SUBTYPE_CCW_CRW, &info); ++ ++ if (!ret) { ++ vcdev->crw_region_size = info->size; ++ if (sizeof(*vcdev->crw_region) != vcdev->crw_region_size) { ++ error_setg(errp, "vfio: Unexpected size of the CRW region"); ++ goto out_err; ++ } ++ vcdev->crw_region_offset = info->offset; ++ vcdev->crw_region = g_malloc(info->size); ++ } ++ + g_free(info); + return; + + out_err: ++ g_free(vcdev->crw_region); + g_free(vcdev->schib_region); + g_free(vcdev->async_cmd_region); + g_free(vcdev->io_region); +@@ -481,6 +544,7 @@ out_err: + + static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) + { ++ g_free(vcdev->crw_region); + g_free(vcdev->schib_region); + g_free(vcdev->async_cmd_region); + g_free(vcdev->io_region); +@@ -596,6 +660,14 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + goto out_notifier_err; + } + ++ if (vcdev->crw_region) { ++ vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err); ++ if (err) { ++ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); ++ goto out_notifier_err; ++ } ++ } ++ + return; + + out_notifier_err: +@@ -620,6 +692,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + VFIOGroup *group = vcdev->vdev.group; + ++ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX); + vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); + vfio_ccw_put_region(vcdev); + vfio_ccw_put_device(vcdev); +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-ccw-Add-support-for-the-schib-region.patch b/SOURCES/kvm-vfio-ccw-Add-support-for-the-schib-region.patch new file mode 100644 index 0000000..667e5cf --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Add-support-for-the-schib-region.patch @@ -0,0 +1,254 @@ +From b73e3e52f76db823d7bffe3f705f575ca413863b Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:39 -0400 +Subject: [PATCH 05/12] vfio-ccw: Add support for the schib region + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-6-cohuck@redhat.com> +Patchwork-id: 97697 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 5/9] vfio-ccw: Add support for the schib region +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +From: Farhan Ali + +The schib region can be used to obtain the latest SCHIB from the host +passthrough subchannel. Since the guest SCHIB is virtualized, +we currently only update the path related information so that the +guest is aware of any path related changes when it issues the +'stsch' instruction. + +Signed-off-by: Farhan Ali +Signed-off-by: Eric Farman +Reviewed-by: Cornelia Huck +Message-Id: <20200505125757.98209-4-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 46ea3841edaff2a7657b8f6c7f474e5e3850cd62) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/css.c | 13 ++++++-- + hw/s390x/s390-ccw.c | 21 +++++++++++++ + hw/vfio/ccw.c | 63 +++++++++++++++++++++++++++++++++++++ + include/hw/s390x/css.h | 3 +- + include/hw/s390x/s390-ccw.h | 1 + + target/s390x/ioinst.c | 3 +- + 6 files changed, 99 insertions(+), 5 deletions(-) + +diff --git a/hw/s390x/css.c b/hw/s390x/css.c +index 844caab408..71fd3f9a00 100644 +--- a/hw/s390x/css.c ++++ b/hw/s390x/css.c +@@ -1335,11 +1335,20 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) + } + } + +-int css_do_stsch(SubchDev *sch, SCHIB *schib) ++IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib) + { ++ int ret; ++ ++ /* ++ * For some subchannels, we may want to update parts of ++ * the schib (e.g., update path masks from the host device ++ * for passthrough subchannels). ++ */ ++ ret = s390_ccw_store(sch); ++ + /* Use current status. */ + copy_schib_to_guest(schib, &sch->curr_status); +- return 0; ++ return ret; + } + + static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src) +diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c +index 0c5a5b60bd..75b788c95e 100644 +--- a/hw/s390x/s390-ccw.c ++++ b/hw/s390x/s390-ccw.c +@@ -51,6 +51,27 @@ int s390_ccw_clear(SubchDev *sch) + return cdc->handle_clear(sch); + } + ++IOInstEnding s390_ccw_store(SubchDev *sch) ++{ ++ S390CCWDeviceClass *cdc = NULL; ++ int ret = IOINST_CC_EXPECTED; ++ ++ /* ++ * This code is called for both virtual and passthrough devices, ++ * but only applies to to the latter. This ugly check makes that ++ * distinction for us. ++ */ ++ if (object_dynamic_cast(OBJECT(sch->driver_data), TYPE_S390_CCW)) { ++ cdc = S390_CCW_DEVICE_GET_CLASS(sch->driver_data); ++ } ++ ++ if (cdc && cdc->handle_store) { ++ ret = cdc->handle_store(sch); ++ } ++ ++ return ret; ++} ++ + static void s390_ccw_get_dev_info(S390CCWDevice *cdev, + char *sysfsdev, + Error **errp) +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 17eb4c4048..859ad646f1 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -41,6 +41,9 @@ struct VFIOCCWDevice { + uint64_t async_cmd_region_size; + uint64_t async_cmd_region_offset; + struct ccw_cmd_region *async_cmd_region; ++ uint64_t schib_region_size; ++ uint64_t schib_region_offset; ++ struct ccw_schib_region *schib_region; + EventNotifier io_notifier; + bool force_orb_pfch; + bool warned_orb_pfch; +@@ -116,6 +119,51 @@ again: + } + } + ++static IOInstEnding vfio_ccw_handle_store(SubchDev *sch) ++{ ++ S390CCWDevice *cdev = sch->driver_data; ++ VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); ++ SCHIB *schib = &sch->curr_status; ++ struct ccw_schib_region *region = vcdev->schib_region; ++ SCHIB *s; ++ int ret; ++ ++ /* schib region not available so nothing else to do */ ++ if (!region) { ++ return IOINST_CC_EXPECTED; ++ } ++ ++ memset(region, 0, sizeof(*region)); ++ ret = pread(vcdev->vdev.fd, region, vcdev->schib_region_size, ++ vcdev->schib_region_offset); ++ ++ if (ret == -1) { ++ /* ++ * Device is probably damaged, but store subchannel does not ++ * have a nonzero cc defined for this scenario. Log an error, ++ * and presume things are otherwise fine. ++ */ ++ error_report("vfio-ccw: store region read failed with errno=%d", errno); ++ return IOINST_CC_EXPECTED; ++ } ++ ++ /* ++ * Selectively copy path-related bits of the SCHIB, ++ * rather than copying the entire struct. ++ */ ++ s = (SCHIB *)region->schib_area; ++ schib->pmcw.pnom = s->pmcw.pnom; ++ schib->pmcw.lpum = s->pmcw.lpum; ++ schib->pmcw.pam = s->pmcw.pam; ++ schib->pmcw.pom = s->pmcw.pom; ++ ++ if (s->scsw.flags & SCSW_FLAGS_MASK_PNO) { ++ schib->scsw.flags |= SCSW_FLAGS_MASK_PNO; ++ } ++ ++ return IOINST_CC_EXPECTED; ++} ++ + static int vfio_ccw_handle_clear(SubchDev *sch) + { + S390CCWDevice *cdev = sch->driver_data; +@@ -382,10 +430,23 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) + vcdev->async_cmd_region = g_malloc0(info->size); + } + ++ ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, ++ VFIO_REGION_SUBTYPE_CCW_SCHIB, &info); ++ if (!ret) { ++ vcdev->schib_region_size = info->size; ++ if (sizeof(*vcdev->schib_region) != vcdev->schib_region_size) { ++ error_setg(errp, "vfio: Unexpected size of the schib region"); ++ goto out_err; ++ } ++ vcdev->schib_region_offset = info->offset; ++ vcdev->schib_region = g_malloc(info->size); ++ } ++ + g_free(info); + return; + + out_err: ++ g_free(vcdev->schib_region); + g_free(vcdev->async_cmd_region); + g_free(vcdev->io_region); + g_free(info); +@@ -394,6 +455,7 @@ out_err: + + static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) + { ++ g_free(vcdev->schib_region); + g_free(vcdev->async_cmd_region); + g_free(vcdev->io_region); + } +@@ -569,6 +631,7 @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data) + cdc->handle_request = vfio_ccw_handle_request; + cdc->handle_halt = vfio_ccw_handle_halt; + cdc->handle_clear = vfio_ccw_handle_clear; ++ cdc->handle_store = vfio_ccw_handle_store; + } + + static const TypeInfo vfio_ccw_info = { +diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h +index f46bcafb16..7e3a5e7433 100644 +--- a/include/hw/s390x/css.h ++++ b/include/hw/s390x/css.h +@@ -218,6 +218,7 @@ IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); + + int s390_ccw_halt(SubchDev *sch); + int s390_ccw_clear(SubchDev *sch); ++IOInstEnding s390_ccw_store(SubchDev *sch); + + typedef enum { + CSS_IO_ADAPTER_VIRTIO = 0, +@@ -242,7 +243,7 @@ SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, + uint16_t schid); + bool css_subch_visible(SubchDev *sch); + void css_conditional_io_interrupt(SubchDev *sch); +-int css_do_stsch(SubchDev *sch, SCHIB *schib); ++IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib); + bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid); + IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib); + IOInstEnding css_do_xsch(SubchDev *sch); +diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h +index fffb54562f..4a43803ef2 100644 +--- a/include/hw/s390x/s390-ccw.h ++++ b/include/hw/s390x/s390-ccw.h +@@ -37,6 +37,7 @@ typedef struct S390CCWDeviceClass { + IOInstEnding (*handle_request) (SubchDev *sch); + int (*handle_halt) (SubchDev *sch); + int (*handle_clear) (SubchDev *sch); ++ IOInstEnding (*handle_store) (SubchDev *sch); + } S390CCWDeviceClass; + + #endif +diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c +index f40c35c6ff..b6be300cc4 100644 +--- a/target/s390x/ioinst.c ++++ b/target/s390x/ioinst.c +@@ -292,8 +292,7 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, + sch = css_find_subch(m, cssid, ssid, schid); + if (sch) { + if (css_subch_visible(sch)) { +- css_do_stsch(sch, &schib); +- cc = 0; ++ cc = css_do_stsch(sch, &schib); + } else { + /* Indicate no more subchannels in this css/ss */ + cc = 3; +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-ccw-Fix-error-message.patch b/SOURCES/kvm-vfio-ccw-Fix-error-message.patch new file mode 100644 index 0000000..86d2fdf --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Fix-error-message.patch @@ -0,0 +1,48 @@ +From 7258b1fabcd152c2ad9b61485b869a41d1bc64e2 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:35 -0400 +Subject: [PATCH 01/12] vfio-ccw: Fix error message +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-2-cohuck@redhat.com> +Patchwork-id: 97693 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/9] vfio-ccw: Fix error message +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: Boris Fiuczynski + +Signed-off-by: Boris Fiuczynski +Reviewed-by: Eric Farman +Message-Id: <20191128143015.5231-1-fiuczy@linux.ibm.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Cornelia Huck +(cherry picked from commit 91f751dc111b270b1e81d80ac92cf479e7620fa4) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/ccw.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 6863f6c69f..3b5520ae75 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -102,7 +102,7 @@ again: + if (errno == EAGAIN) { + goto again; + } +- error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); ++ error_report("vfio-ccw: write I/O region failed with errno=%d", errno); + ret = -errno; + } else { + ret = region->ret_code; +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-ccw-Refactor-ccw-irq-handler.patch b/SOURCES/kvm-vfio-ccw-Refactor-ccw-irq-handler.patch new file mode 100644 index 0000000..8a3514d --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Refactor-ccw-irq-handler.patch @@ -0,0 +1,155 @@ +From ee9b03e774641fba8baaf85256706fcc5e8d8efa Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:40 -0400 +Subject: [PATCH 06/12] vfio-ccw: Refactor ccw irq handler + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-7-cohuck@redhat.com> +Patchwork-id: 97695 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 6/9] vfio-ccw: Refactor ccw irq handler +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +From: Eric Farman + +Make it easier to add new ones in the future. + +Signed-off-by: Eric Farman +Reviewed-by: Cornelia Huck +Message-Id: <20200505125757.98209-5-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 690e29b91102ac69810b35fe72cd90bc9fa1fff7) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/ccw.c | 58 +++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 42 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 859ad646f1..94a0d9840d 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -324,22 +324,36 @@ read_err: + css_inject_io_interrupt(sch); + } + +-static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) ++static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, ++ unsigned int irq, ++ Error **errp) + { + VFIODevice *vdev = &vcdev->vdev; + struct vfio_irq_info *irq_info; + size_t argsz; + int fd; ++ EventNotifier *notifier; ++ IOHandler *fd_read; ++ ++ switch (irq) { ++ case VFIO_CCW_IO_IRQ_INDEX: ++ notifier = &vcdev->io_notifier; ++ fd_read = vfio_ccw_io_notifier_handler; ++ break; ++ default: ++ error_setg(errp, "vfio: Unsupported device irq(%d)", irq); ++ return; ++ } + +- if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { +- error_setg(errp, "vfio: unexpected number of io irqs %u", ++ if (vdev->num_irqs < irq + 1) { ++ error_setg(errp, "vfio: unexpected number of irqs %u", + vdev->num_irqs); + return; + } + + argsz = sizeof(*irq_info); + irq_info = g_malloc0(argsz); +- irq_info->index = VFIO_CCW_IO_IRQ_INDEX; ++ irq_info->index = irq; + irq_info->argsz = argsz; + if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, + irq_info) < 0 || irq_info->count < 1) { +@@ -347,37 +361,49 @@ static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) + goto out_free_info; + } + +- if (event_notifier_init(&vcdev->io_notifier, 0)) { ++ if (event_notifier_init(notifier, 0)) { + error_setg_errno(errp, errno, +- "vfio: Unable to init event notifier for IO"); ++ "vfio: Unable to init event notifier for irq (%d)", ++ irq); + goto out_free_info; + } + +- fd = event_notifier_get_fd(&vcdev->io_notifier); +- qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); ++ fd = event_notifier_get_fd(notifier); ++ qemu_set_fd_handler(fd, fd_read, NULL, vcdev); + +- if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, ++ if (vfio_set_irq_signaling(vdev, irq, 0, + VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { + qemu_set_fd_handler(fd, NULL, NULL, vcdev); +- event_notifier_cleanup(&vcdev->io_notifier); ++ event_notifier_cleanup(notifier); + } + + out_free_info: + g_free(irq_info); + } + +-static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) ++static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, ++ unsigned int irq) + { + Error *err = NULL; ++ EventNotifier *notifier; ++ ++ switch (irq) { ++ case VFIO_CCW_IO_IRQ_INDEX: ++ notifier = &vcdev->io_notifier; ++ break; ++ default: ++ error_report("vfio: Unsupported device irq(%d)", irq); ++ return; ++ } + +- if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, ++ if (vfio_set_irq_signaling(&vcdev->vdev, irq, 0, + VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { + error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); + } + +- qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), ++ qemu_set_fd_handler(event_notifier_get_fd(notifier), + NULL, NULL, vcdev); +- event_notifier_cleanup(&vcdev->io_notifier); ++ event_notifier_cleanup(notifier); + } + + static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) +@@ -565,7 +591,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + goto out_region_err; + } + +- vfio_ccw_register_io_notifier(vcdev, &err); ++ vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err); + if (err) { + goto out_notifier_err; + } +@@ -594,7 +620,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + VFIOGroup *group = vcdev->vdev.group; + +- vfio_ccw_unregister_io_notifier(vcdev); ++ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); + vfio_ccw_put_region(vcdev); + vfio_ccw_put_device(vcdev); + vfio_put_group(group); +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-ccw-Refactor-cleanup-of-regions.patch b/SOURCES/kvm-vfio-ccw-Refactor-cleanup-of-regions.patch new file mode 100644 index 0000000..1741f4b --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Refactor-cleanup-of-regions.patch @@ -0,0 +1,73 @@ +From 30906c9c78af2710a2b86c096cc7b18bbc4b4e69 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:38 -0400 +Subject: [PATCH 04/12] vfio-ccw: Refactor cleanup of regions + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-5-cohuck@redhat.com> +Patchwork-id: 97694 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 4/9] vfio-ccw: Refactor cleanup of regions +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +From: Eric Farman + +While we're at it, add a g_free() for the async_cmd_region that +is the last thing currently created. g_free() knows how to handle +NULL pointers, so this makes it easier to remember what cleanups +need to be performed when new regions are added. + +Signed-off-by: Eric Farman +Reviewed-by: Cornelia Huck +Message-Id: <20200505125757.98209-3-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 2a3b9cbaa7b25a4db4cdcfe1c65279c5464f2923) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/ccw.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 6bc612b5b7..17eb4c4048 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -363,8 +363,7 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) + vcdev->io_region_size = info->size; + if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { + error_setg(errp, "vfio: Unexpected size of the I/O region"); +- g_free(info); +- return; ++ goto out_err; + } + + vcdev->io_region_offset = info->offset; +@@ -377,15 +376,20 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) + vcdev->async_cmd_region_size = info->size; + if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { + error_setg(errp, "vfio: Unexpected size of the async cmd region"); +- g_free(vcdev->io_region); +- g_free(info); +- return; ++ goto out_err; + } + vcdev->async_cmd_region_offset = info->offset; + vcdev->async_cmd_region = g_malloc0(info->size); + } + + g_free(info); ++ return; ++ ++out_err: ++ g_free(vcdev->async_cmd_region); ++ g_free(vcdev->io_region); ++ g_free(info); ++ return; + } + + static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-ccw-allow-non-prefetch-ORBs.patch b/SOURCES/kvm-vfio-ccw-allow-non-prefetch-ORBs.patch new file mode 100644 index 0000000..da2fc5c --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-allow-non-prefetch-ORBs.patch @@ -0,0 +1,61 @@ +From d5f5a307f3396064d29ef0d300c7377756dd165b Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 23 Jun 2020 09:25:36 -0400 +Subject: [PATCH 02/12] vfio-ccw: allow non-prefetch ORBs + +RH-Author: Cornelia Huck +Message-id: <20200623092543.358315-3-cohuck@redhat.com> +Patchwork-id: 97692 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/9] vfio-ccw: allow non-prefetch ORBs +Bugzilla: 1660916 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +From: Jared Rossi + +Remove the explicit prefetch check when using vfio-ccw devices. +This check does not trigger in practice as all Linux channel programs +are intended to use prefetch. + +Newer Linux kernel versions do not require to force the PFCH flag with +vfio-ccw devices anymore. + +Signed-off-by: Jared Rossi +Reviewed-by: Eric Farman +Message-Id: <20200512181535.18630-2-jrossi@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 24e58a7b1d411627e326144030a20dcf0093fed0) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/ccw.c | 13 +++---------- + 1 file changed, 3 insertions(+), 10 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 3b5520ae75..6bc612b5b7 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -74,16 +74,9 @@ static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) + struct ccw_io_region *region = vcdev->io_region; + int ret; + +- if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) { +- if (!(vcdev->force_orb_pfch)) { +- warn_once_pfch(vcdev, sch, "requires PFCH flag set"); +- sch_gen_unit_exception(sch); +- css_inject_io_interrupt(sch); +- return IOINST_CC_EXPECTED; +- } else { +- sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; +- warn_once_pfch(vcdev, sch, "PFCH flag forced"); +- } ++ if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) { ++ sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; ++ warn_once_pfch(vcdev, sch, "PFCH flag forced"); + } + + QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch b/SOURCES/kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch new file mode 100644 index 0000000..81cf80e --- /dev/null +++ b/SOURCES/kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch @@ -0,0 +1,75 @@ +From f01098bb86c12f485895f38f7a24170ec84b60b6 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 8 Jun 2020 16:25:21 -0400 +Subject: [PATCH 42/42] vfio/nvlink: Remove exec permission to avoid SELinux + AVCs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +Message-id: <20200608162521.382858-2-gkurz@redhat.com> +Patchwork-id: 97459 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] vfio/nvlink: Remove exec permission to avoid SELinux AVCs +Bugzilla: 1823275 +RH-Acked-by: David Gibson +RH-Acked-by: Laurent Vivier +RH-Acked-by: Philippe Mathieu-Daudé + +From: Leonardo Bras + +If SELinux is setup without 'execmem' permission for qemu, all mmap +with (PROT_WRITE | PROT_EXEC) will fail and print a warning in +SELinux log. + +If "nvlink2-mr" memory allocation fails (fist diff), it will cause +guest NUMA nodes to not be correctly configured (V100 memory will +not be visible for guest, nor its NUMA nodes). + +Not having 'execmem' permission is intesting for virtual machines to +avoid buffer-overflow based attacks, and it's adopted in distros +like RHEL. + +So, removing the PROT_EXEC flag seems the right thing to do. + +Browsing some other code that mmaps memory for usage with +memory_region_init_ram_device_ptr, I could notice it's usual to +not have PROT_EXEC (only PROT_READ | PROT_WRITE), so it should be +no problem around this. + +Signed-off-by: Leonardo Bras +Message-Id: <20200501055448.286518-1-leobras.c@gmail.com> +Acked-by: Alex Williamson +Signed-off-by: David Gibson +(cherry picked from commit 9c7c0407028355ca83349b8a60fddfad46f2ebd8) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/pci-quirks.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 4505ffe48a..1c5fe014cf 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -2237,7 +2237,7 @@ int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp) + } + cap = (void *) hdr; + +- p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE | PROT_EXEC, ++ p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE, + MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset); + if (p == MAP_FAILED) { + ret = -errno; +@@ -2297,7 +2297,7 @@ int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp) + + /* Some NVLink bridges may not have assigned ATSD */ + if (atsdreg->size) { +- p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE | PROT_EXEC, ++ p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE, + MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset); + if (p == MAP_FAILED) { + ret = -errno; +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch b/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch new file mode 100644 index 0000000..d416e0f --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch @@ -0,0 +1,58 @@ +From e4631c00d8e9ee3608ef3196cbe8bec4841ee988 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 8 Jan 2020 15:04:57 +0000 +Subject: [PATCH 2/5] vfio/pci: Don't remove irqchip notifier if not registered + +RH-Author: Peter Xu +Message-id: <20200108150457.12324-2-peterx@redhat.com> +Patchwork-id: 93291 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vfio/pci: Don't remove irqchip notifier if not registered +Bugzilla: 1782678 +RH-Acked-by: Alex Williamson +RH-Acked-by: Cornelia Huck +RH-Acked-by: Auger Eric +RH-Acked-by: Jens Freimann + +The kvm irqchip notifier is only registered if the device supports +INTx, however it's unconditionally removed. If the assigned device +does not support INTx, this will cause QEMU to crash when unplugging +the device from the system. Change it to conditionally remove the +notifier only if the notify hook is setup. + +CC: Eduardo Habkost +CC: David Gibson +CC: Alex Williamson +Cc: qemu-stable@nongnu.org # v4.2 +Reported-by: yanghliu@redhat.com +Debugged-by: Eduardo Habkost +Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1782678 +Signed-off-by: Peter Xu +Reviewed-by: David Gibson +Reviewed-by: Greg Kurz +Signed-off-by: Alex Williamson +(cherry picked from commit 0446f8121723b134ca1d1ed0b73e96d4a0a8689d) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 309535f..d717520 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3100,7 +3100,9 @@ static void vfio_exitfn(PCIDevice *pdev) + vfio_unregister_req_notifier(vdev); + vfio_unregister_err_notifier(vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); +- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ if (vdev->irqchip_change_notifier.notify) { ++ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ } + vfio_disable_interrupts(vdev); + if (vdev->intx.mmap_timer) { + timer_free(vdev->intx.mmap_timer); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch b/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch new file mode 100644 index 0000000..c41a14c --- /dev/null +++ b/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch @@ -0,0 +1,53 @@ +From 0d545c5850caf76ad3e8dd9bb0fbc9f86b08e220 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:11 +0100 +Subject: [PATCH 002/116] vhost: Add names to section rounded warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-2-dgilbert@redhat.com> +Patchwork-id: 93450 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] vhost: Add names to section rounded warning +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +Add the memory region names to section rounding/alignment +warnings. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ff4776147e960b128ee68f94c728659f662f4378) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 4da0d5a..774d87d 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -590,9 +590,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, + * match up in the same RAMBlock if they do. + */ + if (mrs_gpa < prev_gpa_start) { +- error_report("%s:Section rounded to %"PRIx64 +- " prior to previous %"PRIx64, +- __func__, mrs_gpa, prev_gpa_start); ++ error_report("%s:Section '%s' rounded to %"PRIx64 ++ " prior to previous '%s' %"PRIx64, ++ __func__, section->mr->name, mrs_gpa, ++ prev_sec->mr->name, prev_gpa_start); + /* A way to cleanly fail here would be better */ + return; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch b/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch new file mode 100644 index 0000000..e082ce8 --- /dev/null +++ b/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch @@ -0,0 +1,97 @@ +From c35466c168e5219bf585aa65ac31fc9bdc7cbf36 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:12 +0100 +Subject: [PATCH 003/116] vhost: Only align sections for vhost-user +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-3-dgilbert@redhat.com> +Patchwork-id: 93452 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] vhost: Only align sections for vhost-user +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +I added hugepage alignment code in c1ece84e7c9 to deal with +vhost-user + postcopy which needs aligned pages when using userfault. +However, on x86 the lower 2MB of address space tends to be shotgun'd +with small fragments around the 512-640k range - e.g. video RAM, and +with HyperV synic pages tend to sit around there - again splitting +it up. The alignment code complains with a 'Section rounded to ...' +error and gives up. + +Since vhost-user already filters out devices without an fd +(see vhost-user.c vhost_user_mem_section_filter) it shouldn't be +affected by those overlaps. + +Turn the alignment off on vhost-kernel so that it doesn't try +and align, and thus won't hit the rounding issues. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200116202414.157959-3-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Paolo Bonzini +(cherry picked from commit 76525114736e8f669766e69b715fa59ce8648aae) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 34 ++++++++++++++++++---------------- + 1 file changed, 18 insertions(+), 16 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 774d87d..25fd469 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, + uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + RAMBlock *mrs_rb = section->mr->ram_block; +- size_t mrs_page = qemu_ram_pagesize(mrs_rb); + + trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + +- /* Round the section to it's page size */ +- /* First align the start down to a page boundary */ +- uint64_t alignage = mrs_host & (mrs_page - 1); +- if (alignage) { +- mrs_host -= alignage; +- mrs_size += alignage; +- mrs_gpa -= alignage; +- } +- /* Now align the size up to a page boundary */ +- alignage = mrs_size & (mrs_page - 1); +- if (alignage) { +- mrs_size += mrs_page - alignage; +- } +- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, +- mrs_host); ++ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { ++ /* Round the section to it's page size */ ++ /* First align the start down to a page boundary */ ++ size_t mrs_page = qemu_ram_pagesize(mrs_rb); ++ uint64_t alignage = mrs_host & (mrs_page - 1); ++ if (alignage) { ++ mrs_host -= alignage; ++ mrs_size += alignage; ++ mrs_gpa -= alignage; ++ } ++ /* Now align the size up to a page boundary */ ++ alignage = mrs_size & (mrs_page - 1); ++ if (alignage) { ++ mrs_size += mrs_page - alignage; ++ } ++ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, ++ mrs_host); ++ } + + if (dev->n_tmp_sections) { + /* Since we already have at least one section, lets see if +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-coding-style-fix.patch b/SOURCES/kvm-vhost-coding-style-fix.patch new file mode 100644 index 0000000..4546130 --- /dev/null +++ b/SOURCES/kvm-vhost-coding-style-fix.patch @@ -0,0 +1,56 @@ +From 624d96c456536e1471968a59fbeea206309cc33b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:13 +0100 +Subject: [PATCH 004/116] vhost: coding style fix +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-4-dgilbert@redhat.com> +Patchwork-id: 93453 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] vhost: coding style fix +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Michael S. Tsirkin" + +Drop a trailing whitespace. Make line shorter. + +Fixes: 76525114736e8 ("vhost: Only align sections for vhost-user") +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8347505640238d3b80f9bb7510fdc1bb574bad19) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 25fd469..9edfadc 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -551,7 +551,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, + trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + +- if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { ++ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { + /* Round the section to it's page size */ + /* First align the start down to a page boundary */ + size_t mrs_page = qemu_ram_pagesize(mrs_rb); +@@ -566,8 +566,8 @@ static void vhost_region_add_section(struct vhost_dev *dev, + if (alignage) { + mrs_size += mrs_page - alignage; + } +- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, +- mrs_host); ++ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, ++ mrs_size, mrs_host); + } + + if (dev->n_tmp_sections) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch b/SOURCES/kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch new file mode 100644 index 0000000..7e1353c --- /dev/null +++ b/SOURCES/kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch @@ -0,0 +1,69 @@ +From e06655cfe0fa9473b1e8b311571f36d787472834 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 May 2020 05:54:02 -0400 +Subject: [PATCH 20/42] vhost: correctly turn on VIRTIO_F_IOMMU_PLATFORM + +RH-Author: Thomas Huth +Message-id: <20200529055420.16855-21-thuth@redhat.com> +Patchwork-id: 97041 +O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 20/38] vhost: correctly turn on VIRTIO_F_IOMMU_PLATFORM +Bugzilla: 1828317 +RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Jason Wang + +We turn on device IOTLB via VIRTIO_F_IOMMU_PLATFORM unconditionally on +platform without IOMMU support. This can lead unnecessary IOTLB +transactions which will damage the performance. + +Fixing this by check whether the device is backed by IOMMU and disable +device IOTLB. + +Reported-by: Halil Pasic +Tested-by: Halil Pasic +Reviewed-by: Halil Pasic +Signed-off-by: Jason Wang +Message-Id: <20200302042454.24814-1-jasowang@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f7ef7e6e3ba6e994e070cc609eb154339d1c4a11) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/vhost.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 9edfadc81d..9182a00495 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -290,7 +290,14 @@ static int vhost_dev_has_iommu(struct vhost_dev *dev) + { + VirtIODevice *vdev = dev->vdev; + +- return virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ /* ++ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support ++ * incremental memory mapping API via IOTLB API. For platform that ++ * does not have IOMMU, there's no need to enable this feature ++ * which may cause unnecessary IOTLB miss/update trnasactions. ++ */ ++ return vdev->dma_as != &address_space_memory && ++ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + } + + static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, +@@ -765,6 +772,9 @@ static int vhost_dev_set_features(struct vhost_dev *dev, + if (enable_log) { + features |= 0x1ULL << VHOST_F_LOG_ALL; + } ++ if (!vhost_dev_has_iommu(dev)) { ++ features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM); ++ } + r = dev->vhost_ops->vhost_set_features(dev, features); + if (r < 0) { + VHOST_OPS_DEBUG("vhost_set_features failed"); +-- +2.27.0 + diff --git a/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch b/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch new file mode 100644 index 0000000..e5776e7 --- /dev/null +++ b/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch @@ -0,0 +1,48 @@ +From d6abbdaeb2c35efe6793a599c98116e250b1f179 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:43 +0100 +Subject: [PATCH 072/116] vhost-user: Print unexpected slave message types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-69-dgilbert@redhat.com> +Patchwork-id: 93519 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 068/112] vhost-user: Print unexpected slave message types +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +When we receive an unexpected message type on the slave fd, print +the type. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0fdc465d7d5aafeae127eba488f247ac6f58df4c) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 02a9b25..e4f46ec 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1055,7 +1055,7 @@ static void slave_read(void *opaque) + fd[0]); + break; + default: +- error_report("Received unexpected msg type."); ++ error_report("Received unexpected msg type: %d.", hdr.request); + ret = -EINVAL; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch b/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch new file mode 100644 index 0000000..5904e82 --- /dev/null +++ b/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch @@ -0,0 +1,59 @@ +From 912af6f7c270e2939a91c9b3f62b6ba1202edc43 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:37 +0100 +Subject: [PATCH 006/116] vhost-user-fs: remove "vhostfd" property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-3-dgilbert@redhat.com> +Patchwork-id: 93458 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 002/112] vhost-user-fs: remove "vhostfd" property +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Marc-André Lureau + +The property doesn't make much sense for a vhost-user device. + +Signed-off-by: Marc-André Lureau +Message-Id: <20191116112016.14872-1-marcandre.lureau@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 703857348724319735d9be7b5b996e6445c6e6b9) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user-fs.c | 1 - + include/hw/virtio/vhost-user-fs.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index f0df7f4..ca0b7fc 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -263,7 +263,6 @@ static Property vuf_properties[] = { + DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, + conf.num_request_queues, 1), + DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), +- DEFINE_PROP_STRING("vhostfd", VHostUserFS, conf.vhostfd), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 539885b..9ff1bdb 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -28,7 +28,6 @@ typedef struct { + char *tag; + uint16_t num_request_queues; + uint16_t queue_size; +- char *vhostfd; + } VHostUserFSConf; + + typedef struct { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch b/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch new file mode 100644 index 0000000..3a50632 --- /dev/null +++ b/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch @@ -0,0 +1,52 @@ +From 044feb40e3041759ee77d08136f334cf3ad67c1e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A1n=20Tomko?= +Date: Fri, 21 Feb 2020 09:49:23 +0000 +Subject: [PATCH] vhost-user-gpu: Drop trailing json comma +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Ján Tomko +Message-id: <07fed9a38495938a7180819e27f590d80cd6668d.1582278173.git.jtomko@redhat.com> +Patchwork-id: 94019 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vhost-user-gpu: Drop trailing json comma +Bugzilla: 1805334 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Stefan Hajnoczi + +From: Cole Robinson + +Trailing comma is not valid json: + +$ cat contrib/vhost-user-gpu/50-qemu-gpu.json.in | jq +parse error: Expected another key-value pair at line 5, column 1 + +Signed-off-by: Cole Robinson +Reviewed-by: Marc-André Lureau +Reviewed-by: Li Qiang +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 7f5dd2ac9f3504e2699f23e69bc3d8051b729832.1568925097.git.crobinso@redhat.com +Signed-off-by: Gerd Hoffmann +(cherry picked from commit ca26b032e5a0e8a190c763ce828a8740d24b9b65) +Signed-off-by: Ján Tomko +Signed-off-by: Danilo C. L. de Paula +--- + contrib/vhost-user-gpu/50-qemu-gpu.json.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in +index 658b545..f5edd09 100644 +--- a/contrib/vhost-user-gpu/50-qemu-gpu.json.in ++++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in +@@ -1,5 +1,5 @@ + { + "description": "QEMU vhost-user-gpu", + "type": "gpu", +- "binary": "@libexecdir@/vhost-user-gpu", ++ "binary": "@libexecdir@/vhost-user-gpu" + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch b/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch new file mode 100644 index 0000000..ed10701 --- /dev/null +++ b/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch @@ -0,0 +1,80 @@ +From b395ad369278d0923a590975fabbb99ec7716c6b Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:28 +0000 +Subject: [PATCH 4/7] virtio: add ability to delete vq through a pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-2-jusual@redhat.com> +Patchwork-id: 93980 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/4] virtio: add ability to delete vq through a pointer +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: "Michael S. Tsirkin" + +Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. + +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Hildenbrand +Reviewed-by: David Hildenbrand +(cherry picked from commit 722f8c51d8af223751dfb1d02de40043e8ba067e) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 15 ++++++++++----- + include/hw/virtio/virtio.h | 2 ++ + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3211135..d63a369 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2335,17 +2335,22 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + return &vdev->vq[i]; + } + ++void virtio_delete_queue(VirtQueue *vq) ++{ ++ vq->vring.num = 0; ++ vq->vring.num_default = 0; ++ vq->handle_output = NULL; ++ vq->handle_aio_output = NULL; ++ g_free(vq->used_elems); ++} ++ + void virtio_del_queue(VirtIODevice *vdev, int n) + { + if (n < 0 || n >= VIRTIO_QUEUE_MAX) { + abort(); + } + +- vdev->vq[n].vring.num = 0; +- vdev->vq[n].vring.num_default = 0; +- vdev->vq[n].handle_output = NULL; +- vdev->vq[n].handle_aio_output = NULL; +- g_free(vdev->vq[n].used_elems); ++ virtio_delete_queue(&vdev->vq[n]); + } + + static void virtio_set_isr(VirtIODevice *vdev, int value) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 6a20442..91167f6 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -183,6 +183,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + + void virtio_del_queue(VirtIODevice *vdev, int n); + ++void virtio_delete_queue(VirtQueue *vq); ++ + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); + void virtqueue_flush(VirtQueue *vq, unsigned int count); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch b/SOURCES/kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch new file mode 100644 index 0000000..9e46be1 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch @@ -0,0 +1,203 @@ +From fdd1f3bf672ad8bb0a6db896ec8cbc797c31da1f Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Wed, 24 Jun 2020 13:24:53 -0400 +Subject: [PATCH 11/12] virtio-blk: On restart, process queued requests in the + proper context + +RH-Author: Sergio Lopez Pascual +Message-id: <20200624132453.111276-3-slp@redhat.com> +Patchwork-id: 97798 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] virtio-blk: On restart, process queued requests in the proper context +Bugzilla: +RH-Acked-by: John Snow +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Kevin Wolf + +On restart, we were scheduling a BH to process queued requests, which +would run before starting up the data plane, leading to those requests +being assigned and started on coroutines on the main context. + +This could cause requests to be wrongly processed in parallel from +different threads (the main thread and the iothread managing the data +plane), potentially leading to multiple issues. + +For example, stopping and resuming a VM multiple times while the guest +is generating I/O on a virtio_blk device can trigger a crash with a +stack tracing looking like this one: + +<------> + Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): + #0 0x00005567a13b99d6 in iov_memset + (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) + at util/iov.c:69 + #1 0x00005567a13bab73 in qemu_iovec_memset + (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 + #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 + #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 + #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 + #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) + at block/linux-aio.c:236 + #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 + #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) + at util/aio-posix.c:520 + #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) + at util/aio-posix.c:562 + #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) + at util/aio-posix.c:597 + #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 + #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 + #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 + #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 + #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6 + + Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): + #0 0x00005567a13b99d6 in iov_memset + (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) + at util/iov.c:69 + #1 0x00005567a13bab73 in qemu_iovec_memset + (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 + #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 + #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 + #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 + #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) + at block/linux-aio.c:375 + #6 0x00005567a12f4af2 in laio_co_submit + (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) + at block/linux-aio.c:394 + #7 0x00005567a12f1803 in raw_co_prw + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) + at block/file-posix.c:1892 + #8 0x00005567a12f1941 in raw_co_pwritev + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) + at block/file-posix.c:1925 + #9 0x00005567a12fe3e1 in bdrv_driver_pwritev + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) + at block/io.c:1183 + #10 0x00005567a1300340 in bdrv_aligned_pwritev + (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 + #11 0x00005567a1300b29 in bdrv_co_pwritev_part + (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) + at block/io.c:2137 + #12 0x00005567a12baba1 in qcow2_co_pwritev_task + (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 + #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 + #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 + #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 + #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 + #17 0x00007ff6626f1350 in () + #18 0x0000000000000000 in () +<------> + +This is also known to cause crashes with this message (assertion +failed): + + aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 +Signed-off-by: Sergio Lopez +Message-Id: <20200603093240.40489-3-slp@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 49b44549ace7890fffdf027fd3695218ee7f1121) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/dataplane/virtio-blk.c | 8 ++++++++ + hw/block/virtio-blk.c | 18 ++++++++++++------ + include/hw/virtio/virtio-blk.h | 2 +- + 3 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 119906a5fe..ac495fd72a 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -220,6 +220,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + goto fail_guest_notifiers; + } + ++ /* Process queued requests before the ones in vring */ ++ virtio_blk_process_queued_requests(vblk, false); ++ + /* Kick right away to begin processing requests already in vring */ + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); +@@ -239,6 +242,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + return 0; + + fail_guest_notifiers: ++ /* ++ * If we failed to set up the guest notifiers queued requests will be ++ * processed on the main context. ++ */ ++ virtio_blk_process_queued_requests(vblk, false); + vblk->dataplane_disabled = true; + s->starting = false; + vblk->dataplane_started = true; +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 6ff29a05d6..493a263fa6 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -819,7 +819,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtio_blk_handle_output_do(s, vq); + } + +-void virtio_blk_process_queued_requests(VirtIOBlock *s) ++void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) + { + VirtIOBlockReq *req = s->rq; + MultiReqBuffer mrb = {}; +@@ -847,7 +847,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s) + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } +- blk_dec_in_flight(s->conf.conf.blk); ++ if (is_bh) { ++ blk_dec_in_flight(s->conf.conf.blk); ++ } + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + +@@ -858,21 +860,25 @@ static void virtio_blk_dma_restart_bh(void *opaque) + qemu_bh_delete(s->bh); + s->bh = NULL; + +- virtio_blk_process_queued_requests(s); ++ virtio_blk_process_queued_requests(s, true); + } + + static void virtio_blk_dma_restart_cb(void *opaque, int running, + RunState state) + { + VirtIOBlock *s = opaque; ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); ++ VirtioBusState *bus = VIRTIO_BUS(qbus); + + if (!running) { + return; + } + +- if (!s->bh) { +- /* FIXME The data plane is not started yet, so these requests are +- * processed in the main thread. */ ++ /* ++ * If ioeventfd is enabled, don't schedule the BH here as queued ++ * requests will be processed while starting the data plane. ++ */ ++ if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { + s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), + virtio_blk_dma_restart_bh, s); + blk_inc_in_flight(s->conf.conf.blk); +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index cf8eea2f58..e77f0db3b0 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -84,6 +84,6 @@ typedef struct MultiReqBuffer { + } MultiReqBuffer; + + bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +-void virtio_blk_process_queued_requests(VirtIOBlock *s); ++void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); + + #endif +-- +2.27.0 + diff --git a/SOURCES/kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch b/SOURCES/kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch new file mode 100644 index 0000000..148045d --- /dev/null +++ b/SOURCES/kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch @@ -0,0 +1,83 @@ +From 73d83d8880e85eedc22c9651b67d1eacd5de5ff4 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Wed, 24 Jun 2020 13:24:52 -0400 +Subject: [PATCH 10/12] virtio-blk: Refactor the code that processes queued + requests + +RH-Author: Sergio Lopez Pascual +Message-id: <20200624132453.111276-2-slp@redhat.com> +Patchwork-id: 97797 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] virtio-blk: Refactor the code that processes queued requests +Bugzilla: +RH-Acked-by: John Snow +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Kevin Wolf + +Move the code that processes queued requests from +virtio_blk_dma_restart_bh() to its own, non-static, function. This +will allow us to call it from the virtio_blk_data_plane_start() in a +future patch. + +Signed-off-by: Sergio Lopez +Message-Id: <20200603093240.40489-2-slp@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7aa1c247b466870b0704d3ccdc3755e5e7394dca) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/virtio-blk.c | 16 +++++++++++----- + include/hw/virtio/virtio-blk.h | 1 + + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index c4e55fb3de..6ff29a05d6 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -819,15 +819,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtio_blk_handle_output_do(s, vq); + } + +-static void virtio_blk_dma_restart_bh(void *opaque) ++void virtio_blk_process_queued_requests(VirtIOBlock *s) + { +- VirtIOBlock *s = opaque; + VirtIOBlockReq *req = s->rq; + MultiReqBuffer mrb = {}; + +- qemu_bh_delete(s->bh); +- s->bh = NULL; +- + s->rq = NULL; + + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); +@@ -855,6 +851,16 @@ static void virtio_blk_dma_restart_bh(void *opaque) + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + ++static void virtio_blk_dma_restart_bh(void *opaque) ++{ ++ VirtIOBlock *s = opaque; ++ ++ qemu_bh_delete(s->bh); ++ s->bh = NULL; ++ ++ virtio_blk_process_queued_requests(s); ++} ++ + static void virtio_blk_dma_restart_cb(void *opaque, int running, + RunState state) + { +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index cddcfbebe9..cf8eea2f58 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -84,5 +84,6 @@ typedef struct MultiReqBuffer { + } MultiReqBuffer; + + bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); ++void virtio_blk_process_queued_requests(VirtIOBlock *s); + + #endif +-- +2.27.0 + diff --git a/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch b/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch new file mode 100644 index 0000000..2dffc01 --- /dev/null +++ b/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch @@ -0,0 +1,158 @@ +From 351dd07d7b5e69cdf47260c9ea848c0c93cd2c8a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jan 2020 11:13:25 +0000 +Subject: [PATCH 3/5] virtio: don't enable notifications during polling + +RH-Author: Stefan Hajnoczi +Message-id: <20200109111325.559557-2-stefanha@redhat.com> +Patchwork-id: 93298 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] virtio: don't enable notifications during polling +Bugzilla: 1789301 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Michael S. Tsirkin + +Virtqueue notifications are not necessary during polling, so we disable +them. This allows the guest driver to avoid MMIO vmexits. +Unfortunately the virtio-blk and virtio-scsi handler functions re-enable +notifications, defeating this optimization. + +Fix virtio-blk and virtio-scsi emulation so they leave notifications +disabled. The key thing to remember for correctness is that polling +always checks one last time after ending its loop, therefore it's safe +to lose the race when re-enabling notifications at the end of polling. + +There is a measurable performance improvement of 5-10% with the null-co +block driver. Real-life storage configurations will see a smaller +improvement because the MMIO vmexit overhead contributes less to +latency. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20191209210957.65087-1-stefanha@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d0435bc513e23a4961b6af20164d1c6c219eb4ea) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/virtio-blk.c | 9 +++++++-- + hw/scsi/virtio-scsi.c | 9 +++++++-- + hw/virtio/virtio.c | 12 ++++++------ + include/hw/virtio/virtio.h | 1 + + 4 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 4c357d2..c4e55fb 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + { + VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + aio_context_acquire(blk_get_aio_context(s->blk)); + blk_io_plug(s->blk); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_blk_get_request(s, vq))) { + progress = true; +@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (!virtio_queue_empty(vq)); + + if (mrb.num_reqs) { +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 54108c0..e2cd1df 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req, *next; + int ret = 0; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_scsi_pop_req(s, vq))) { + progress = true; +@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (ret != -EINVAL && !virtio_queue_empty(vq)); + + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 04716b5..3211135 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) + } + } + ++bool virtio_queue_get_notification(VirtQueue *vq) ++{ ++ return vq->notification; ++} ++ + void virtio_queue_set_notification(VirtQueue *vq, int enable) + { + vq->notification = enable; +@@ -3384,17 +3389,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) + { + EventNotifier *n = opaque; + VirtQueue *vq = container_of(n, VirtQueue, host_notifier); +- bool progress; + + if (!vq->vring.desc || virtio_queue_empty(vq)) { + return false; + } + +- progress = virtio_queue_notify_aio_vq(vq); +- +- /* In case the handler function re-enabled notifications */ +- virtio_queue_set_notification(vq, 0); +- return progress; ++ return virtio_queue_notify_aio_vq(vq); + } + + static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c32a815..6a20442 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -224,6 +224,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); + + void virtio_notify_config(VirtIODevice *vdev); + ++bool virtio_queue_get_notification(VirtQueue *vq); + void virtio_queue_set_notification(VirtQueue *vq, int enable); + + int virtio_queue_ready(VirtQueue *vq); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch b/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch new file mode 100644 index 0000000..9a69ed1 --- /dev/null +++ b/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch @@ -0,0 +1,60 @@ +From c0cf6d8a1d3b9bf3928f37fcfd5aa8ae6f1338ca Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:36 +0100 +Subject: [PATCH 005/116] virtio-fs: fix MSI-X nvectors calculation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-2-dgilbert@redhat.com> +Patchwork-id: 93455 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 001/112] virtio-fs: fix MSI-X nvectors calculation +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +The following MSI-X vectors are required: + * VIRTIO Configuration Change + * hiprio virtqueue + * requests virtqueues + +Fix the calculation to reserve enough MSI-X vectors. Otherwise guest +drivers fall back to a sub-optional configuration where all virtqueues +share a single vector. + +This change does not break live migration compatibility since +vhost-user-fs-pci devices are not migratable yet. + +Reported-by: Vivek Goyal +Signed-off-by: Stefan Hajnoczi +Message-Id: <20191209110759.35227-1-stefanha@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 366844f3d1329c6423dd752891a28ccb3ee8fddd) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user-fs-pci.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c +index 933a3f2..e3a649d 100644 +--- a/hw/virtio/vhost-user-fs-pci.c ++++ b/hw/virtio/vhost-user-fs-pci.c +@@ -40,7 +40,8 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + DeviceState *vdev = DEVICE(&dev->vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { +- vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 1; ++ /* Also reserve config change and hiprio queue vectors */ ++ vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2; + } + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch b/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch new file mode 100644 index 0000000..16eb1da --- /dev/null +++ b/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch @@ -0,0 +1,42 @@ +From 901e65fa6ccbadeacd6c585cf49a0a7cdafb4737 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:29 +0000 +Subject: [PATCH 5/7] virtio: make virtio_delete_queue idempotent + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-3-jusual@redhat.com> +Patchwork-id: 93981 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/4] virtio: make virtio_delete_queue idempotent +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: "Michael S. Tsirkin" + +Let's make sure calling this twice is harmless - +no known instances, but seems safer. + +Suggested-by: Pan Nengyuan +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8cd353ea0fbf0e334e015d833f612799be642296) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d63a369..e6a9ba4 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2342,6 +2342,7 @@ void virtio_delete_queue(VirtQueue *vq) + vq->handle_output = NULL; + vq->handle_aio_output = NULL; + g_free(vq->used_elems); ++ vq->used_elems = NULL; + } + + void virtio_del_queue(VirtIODevice *vdev, int n) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch new file mode 100644 index 0000000..c21c699 --- /dev/null +++ b/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch @@ -0,0 +1,49 @@ +From 2f494c41715193522c52eafc6af2a5e33f88ceb9 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:31 +0000 +Subject: [PATCH 7/7] virtio-net: delete also control queue when TX/RX deleted + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-5-jusual@redhat.com> +Patchwork-id: 93983 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: Yuri Benditovich + +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +If the control queue is not deleted together with TX/RX, it +later will be ignored in freeing cache resources and hot +unplug will not be completed. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Yuri Benditovich +Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index db3d7c3..f325440 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3101,7 +3101,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) + for (i = 0; i < max_queues; i++) { + virtio_net_del_queue(n, i); + } +- ++ /* delete also control vq */ ++ virtio_del_queue(vdev, max_queues * 2); + qemu_announce_timer_del(&n->announce_timer, false); + g_free(n->vqs); + qemu_del_nic(n->nic); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-net-fix-removal-of-failover-device.patch b/SOURCES/kvm-virtio-net-fix-removal-of-failover-device.patch new file mode 100644 index 0000000..6044f3d --- /dev/null +++ b/SOURCES/kvm-virtio-net-fix-removal-of-failover-device.patch @@ -0,0 +1,52 @@ +From 92fb4f6cdde32652352a0a831a2ba815701a4014 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Fri, 3 Jul 2020 12:37:05 -0400 +Subject: [PATCH 4/4] virtio-net: fix removal of failover device + +RH-Author: Juan Quintela +Message-id: <20200703123705.7175-2-quintela@redhat.com> +Patchwork-id: 97901 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] virtio-net: fix removal of failover device +Bugzilla: +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Laurent Vivier +RH-Acked-by: Dr. David Alan Gilbert + +If you have a networking device and its virtio failover device, and +you remove them in this order: +- virtio device +- the real device + +You get qemu crash. +See bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1820120 + +Bug exist on qemu 4.2 and 5.0. +But in 5.0 don't shows because commit +77b06bba62034a87cc61a9c8de1309ae3e527d97 + +somehow papers over it. + +CC: Jason Wang +CC: Michael S. Tsirkin + +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f325440d01..dabeb9e720 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3091,6 +3091,7 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) + g_free(n->vlans); + + if (n->failover) { ++ device_listener_unregister(&n->primary_listener); + g_free(n->primary_device_id); + g_free(n->standby_id); + qobject_unref(n->primary_device_dict); +-- +2.27.0 + diff --git a/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch b/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch new file mode 100644 index 0000000..c9f1086 --- /dev/null +++ b/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch @@ -0,0 +1,46 @@ +From 8bf4f561262d9282cebdb3418cdb9a69c92216a0 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:30 +0000 +Subject: [PATCH 6/7] virtio: reset region cache when on queue deletion + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-4-jusual@redhat.com> +Patchwork-id: 93982 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/4] virtio: reset region cache when on queue deletion +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: Yuri Benditovich + +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +Fix leak of region reference that prevents complete +device deletion on hot unplug. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Yuri Benditovich +Message-Id: <20191226043649.14481-2-yuri.benditovich@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 421afd2fe8dd4603216cbf36081877c391f5a2a4) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index e6a9ba4..f644d9a 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2343,6 +2343,7 @@ void virtio_delete_queue(VirtQueue *vq) + vq->handle_aio_output = NULL; + g_free(vq->used_elems); + vq->used_elems = NULL; ++ virtio_virtqueue_reset_region_cache(vq); + } + + void virtio_del_queue(VirtIODevice *vdev, int n) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch b/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch new file mode 100644 index 0000000..fec9371 --- /dev/null +++ b/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch @@ -0,0 +1,52 @@ +From f4144443eacceb04823ee72cb2d4f9f841f05495 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:11 +0100 +Subject: [PATCH 040/116] virtiofs: Add maintainers entry +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-37-dgilbert@redhat.com> +Patchwork-id: 93491 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 036/112] virtiofs: Add maintainers entry +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bad7d2c3ad1af9344df035aedaf8e0967a543070) +Signed-off-by: Miroslav Rezanina +--- + MAINTAINERS | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/MAINTAINERS b/MAINTAINERS +index 5e5e3e5..d1b3e26 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1575,6 +1575,14 @@ T: git https://github.com/cohuck/qemu.git s390-next + T: git https://github.com/borntraeger/qemu.git s390-next + L: qemu-s390x@nongnu.org + ++virtiofs ++M: Dr. David Alan Gilbert ++M: Stefan Hajnoczi ++S: Supported ++F: tools/virtiofsd/* ++F: hw/virtio/vhost-user-fs* ++F: include/hw/virtio/vhost-user-fs.h ++ + virtio-input + M: Gerd Hoffmann + S: Maintained +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch b/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch new file mode 100644 index 0000000..a2b91be --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch @@ -0,0 +1,86 @@ +From 4d9106acfd7ed9e4d197ddf9f22b79ba6c8afdd8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:38 +0100 +Subject: [PATCH 067/116] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG + level +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-64-dgilbert@redhat.com> +Patchwork-id: 93514 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 063/112] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG level +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd has some threads, so we see a lot of logs with debug option. +It would be useful for debugging if we can identify the specific thread +from the log. + +Add ID, which is got by gettid(), to the log with FUSE_LOG_DEBUG level +so that we can grep the specific thread. + +The log is like as: + + ]# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto + ... + [ID: 00000097] unique: 12696, success, outsize: 120 + [ID: 00000097] virtio_send_msg: elem 18: with 2 in desc of length 120 + [ID: 00000003] fv_queue_thread: Got queue event on Queue 1 + [ID: 00000003] fv_queue_thread: Queue 1 gave evalue: 1 available: in: 65552 out: 80 + [ID: 00000003] fv_queue_thread: Waiting for Queue 1 event + [ID: 00000071] fv_queue_worker: elem 33: with 2 out desc of length 80 bad_in_num=0 bad_out_num=0 + [ID: 00000071] unique: 12694, opcode: READ (15), nodeid: 2, insize: 80, pid: 2014 + [ID: 00000071] lo_read(ino=2, size=65536, off=131072) + +Signed-off-by: Masayoshi Mizuma + +Signed-off-by: Dr. David Alan Gilbert + added rework as suggested by Daniel P. Berrangé during review +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 36f3846902bd41413f6c0bf797dee509028c29f4) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ff6910f..f08324f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2268,10 +2269,17 @@ static void setup_nofile_rlimit(void) + + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { ++ g_autofree char *localfmt = NULL; ++ + if (current_log_level < level) { + return; + } + ++ if (current_log_level == FUSE_LOG_DEBUG) { ++ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); ++ fmt = localfmt; ++ } ++ + if (use_syslog) { + int priority = LOG_ERR; + switch (level) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch b/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch new file mode 100644 index 0000000..b017bf4 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch @@ -0,0 +1,106 @@ +From 709408de33112d32b7c6675f8c9320b8bebccd58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:05 +0100 +Subject: [PATCH 034/116] virtiofsd: Add Makefile wiring for virtiofsd contrib +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-31-dgilbert@redhat.com> +Patchwork-id: 93482 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 030/112] virtiofsd: Add Makefile wiring for virtiofsd contrib +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Wire up the building of the virtiofsd in tools. + +virtiofsd relies on Linux-specific system calls and seccomp. Anyone +wishing to port it to other host operating systems should do so +carefully and without reducing security. + +Only allow building on Linux hosts. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Liam Merwick +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 81bfc42dcf473bc8d3790622633410da72d8e622) +Signed-off-by: Miroslav Rezanina +--- + Makefile | 10 ++++++++++ + Makefile.objs | 1 + + tools/virtiofsd/Makefile.objs | 9 +++++++++ + 3 files changed, 20 insertions(+) + create mode 100644 tools/virtiofsd/Makefile.objs + +diff --git a/Makefile b/Makefile +index 4254950..1526775 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,6 +330,10 @@ endif + endif + endif + ++ifdef CONFIG_LINUX ++HELPERS-y += virtiofsd$(EXESUF) ++endif ++ + # Sphinx does not allow building manuals into the same directory as + # the source files, so if we're doing an in-tree QEMU build we must + # build the manuals into a subdirectory (and then install them from +@@ -430,6 +434,7 @@ dummy := $(call unnest-vars,, \ + elf2dmp-obj-y \ + ivshmem-client-obj-y \ + ivshmem-server-obj-y \ ++ virtiofsd-obj-y \ + rdmacm-mux-obj-y \ + libvhost-user-obj-y \ + vhost-user-scsi-obj-y \ +@@ -675,6 +680,11 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" + rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + ++ifdef CONFIG_LINUX # relies on Linux-specific syscalls ++virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) ++ $(call LINK, $^) ++endif ++ + vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a + $(call LINK, $^) + +diff --git a/Makefile.objs b/Makefile.objs +index fcf63e1..1a8f288 100644 +--- a/Makefile.objs ++++ b/Makefile.objs +@@ -125,6 +125,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/ + rdmacm-mux-obj-y = contrib/rdmacm-mux/ + vhost-user-input-obj-y = contrib/vhost-user-input/ + vhost-user-gpu-obj-y = contrib/vhost-user-gpu/ ++virtiofsd-obj-y = tools/virtiofsd/ + + ###################################################################### + trace-events-subdirs = +diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs +new file mode 100644 +index 0000000..45a8075 +--- /dev/null ++++ b/tools/virtiofsd/Makefile.objs +@@ -0,0 +1,9 @@ ++virtiofsd-obj-y = buffer.o \ ++ fuse_opt.o \ ++ fuse_log.o \ ++ fuse_lowlevel.o \ ++ fuse_signals.o \ ++ fuse_virtio.o \ ++ helper.o \ ++ passthrough_ll.o ++ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch b/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch new file mode 100644 index 0000000..90150d9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch @@ -0,0 +1,1387 @@ +From 55b4059d6399c212109c758190e15b574accdd07 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:41 +0100 +Subject: [PATCH 010/116] virtiofsd: Add auxiliary .c's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-7-dgilbert@redhat.com> +Patchwork-id: 93461 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 006/112] virtiofsd: Add auxiliary .c's +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add most of the non-main .c files we need from upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ffcf8d9f8649c6e56b1193bbbc9c9f7388920043) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 321 ++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_log.c | 40 ++++ + tools/virtiofsd/fuse_opt.c | 423 +++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_signals.c | 91 +++++++++ + tools/virtiofsd/helper.c | 440 +++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 1315 insertions(+) + create mode 100644 tools/virtiofsd/buffer.c + create mode 100644 tools/virtiofsd/fuse_log.c + create mode 100644 tools/virtiofsd/fuse_opt.c + create mode 100644 tools/virtiofsd/fuse_signals.c + create mode 100644 tools/virtiofsd/helper.c + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +new file mode 100644 +index 0000000..5ab9b87 +--- /dev/null ++++ b/tools/virtiofsd/buffer.c +@@ -0,0 +1,321 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2010 Miklos Szeredi ++ ++ Functions for dealing with `struct fuse_buf` and `struct ++ fuse_bufvec`. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#define _GNU_SOURCE ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_lowlevel.h" ++#include ++#include ++#include ++#include ++ ++size_t fuse_buf_size(const struct fuse_bufvec *bufv) ++{ ++ size_t i; ++ size_t size = 0; ++ ++ for (i = 0; i < bufv->count; i++) { ++ if (bufv->buf[i].size == SIZE_MAX) ++ size = SIZE_MAX; ++ else ++ size += bufv->buf[i].size; ++ } ++ ++ return size; ++} ++ ++static size_t min_size(size_t s1, size_t s2) ++{ ++ return s1 < s2 ? s1 : s2; ++} ++ ++static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ res = pwrite(dst->fd, (char *)src->mem + src_off, len, ++ dst->pos + dst_off); ++ } else { ++ res = write(dst->fd, (char *)src->mem + src_off, len); ++ } ++ if (res == -1) { ++ if (!copied) ++ return -errno; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(dst->flags & FUSE_BUF_FD_RETRY)) ++ break; ++ ++ src_off += res; ++ dst_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ res = pread(src->fd, (char *)dst->mem + dst_off, len, ++ src->pos + src_off); ++ } else { ++ res = read(src->fd, (char *)dst->mem + dst_off, len); ++ } ++ if (res == -1) { ++ if (!copied) ++ return -errno; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY)) ++ break; ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ char buf[4096]; ++ struct fuse_buf tmp = { ++ .size = sizeof(buf), ++ .flags = 0, ++ }; ++ ssize_t res; ++ size_t copied = 0; ++ ++ tmp.mem = buf; ++ ++ while (len) { ++ size_t this_len = min_size(tmp.size, len); ++ size_t read_len; ++ ++ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ read_len = res; ++ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ ++ if (res < this_len) ++ break; ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++#ifdef HAVE_SPLICE ++static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ int splice_flags = 0; ++ off_t *srcpos = NULL; ++ off_t *dstpos = NULL; ++ off_t srcpos_val; ++ off_t dstpos_val; ++ ssize_t res; ++ size_t copied = 0; ++ ++ if (flags & FUSE_BUF_SPLICE_MOVE) ++ splice_flags |= SPLICE_F_MOVE; ++ if (flags & FUSE_BUF_SPLICE_NONBLOCK) ++ splice_flags |= SPLICE_F_NONBLOCK; ++ ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ srcpos_val = src->pos + src_off; ++ srcpos = &srcpos_val; ++ } ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ dstpos_val = dst->pos + dst_off; ++ dstpos = &dstpos_val; ++ } ++ ++ while (len) { ++ res = splice(src->fd, srcpos, dst->fd, dstpos, len, ++ splice_flags); ++ if (res == -1) { ++ if (copied) ++ break; ++ ++ if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) ++ return -errno; ++ ++ /* Maybe splice is not supported for this combination */ ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, ++ len); ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY) && ++ !(dst->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ len -= res; ++ } ++ ++ return copied; ++} ++#else ++static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ (void) flags; ++ ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++} ++#endif ++ ++ ++static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ int src_is_fd = src->flags & FUSE_BUF_IS_FD; ++ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; ++ ++ if (!src_is_fd && !dst_is_fd) { ++ char *dstmem = (char *)dst->mem + dst_off; ++ char *srcmem = (char *)src->mem + src_off; ++ ++ if (dstmem != srcmem) { ++ if (dstmem + len <= srcmem || srcmem + len <= dstmem) ++ memcpy(dstmem, srcmem, len); ++ else ++ memmove(dstmem, srcmem, len); ++ } ++ ++ return len; ++ } else if (!src_is_fd) { ++ return fuse_buf_write(dst, dst_off, src, src_off, len); ++ } else if (!dst_is_fd) { ++ return fuse_buf_read(dst, dst_off, src, src_off, len); ++ } else if (flags & FUSE_BUF_NO_SPLICE) { ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++ } else { ++ return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); ++ } ++} ++ ++static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) ++{ ++ if (bufv->idx < bufv->count) ++ return &bufv->buf[bufv->idx]; ++ else ++ return NULL; ++} ++ ++static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) ++{ ++ const struct fuse_buf *buf = fuse_bufvec_current(bufv); ++ ++ bufv->off += len; ++ assert(bufv->off <= buf->size); ++ if (bufv->off == buf->size) { ++ assert(bufv->idx < bufv->count); ++ bufv->idx++; ++ if (bufv->idx == bufv->count) ++ return 0; ++ bufv->off = 0; ++ } ++ return 1; ++} ++ ++ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, ++ enum fuse_buf_copy_flags flags) ++{ ++ size_t copied = 0; ++ ++ if (dstv == srcv) ++ return fuse_buf_size(dstv); ++ ++ for (;;) { ++ const struct fuse_buf *src = fuse_bufvec_current(srcv); ++ const struct fuse_buf *dst = fuse_bufvec_current(dstv); ++ size_t src_len; ++ size_t dst_len; ++ size_t len; ++ ssize_t res; ++ ++ if (src == NULL || dst == NULL) ++ break; ++ ++ src_len = src->size - srcv->off; ++ dst_len = dst->size - dstv->off; ++ len = min_size(src_len, dst_len); ++ ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ copied += res; ++ ++ if (!fuse_bufvec_advance(srcv, res) || ++ !fuse_bufvec_advance(dstv, res)) ++ break; ++ ++ if (res < len) ++ break; ++ } ++ ++ return copied; ++} +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +new file mode 100644 +index 0000000..0d268ab +--- /dev/null ++++ b/tools/virtiofsd/fuse_log.c +@@ -0,0 +1,40 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2019 Red Hat, Inc. ++ ++ Logging API. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "fuse_log.h" ++ ++#include ++#include ++ ++static void default_log_func( ++ __attribute__(( unused )) enum fuse_log_level level, ++ const char *fmt, va_list ap) ++{ ++ vfprintf(stderr, fmt, ap); ++} ++ ++static fuse_log_func_t log_func = default_log_func; ++ ++void fuse_set_log_func(fuse_log_func_t func) ++{ ++ if (!func) ++ func = default_log_func; ++ ++ log_func = func; ++} ++ ++void fuse_log(enum fuse_log_level level, const char *fmt, ...) ++{ ++ va_list ap; ++ ++ va_start(ap, fmt); ++ log_func(level, fmt, ap); ++ va_end(ap); ++} +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +new file mode 100644 +index 0000000..93066b9 +--- /dev/null ++++ b/tools/virtiofsd/fuse_opt.c +@@ -0,0 +1,423 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Implementation of option parsing routines (dealing with `struct ++ fuse_args`). ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_opt.h" ++#include "fuse_misc.h" ++ ++#include ++#include ++#include ++#include ++ ++struct fuse_opt_context { ++ void *data; ++ const struct fuse_opt *opt; ++ fuse_opt_proc_t proc; ++ int argctr; ++ int argc; ++ char **argv; ++ struct fuse_args outargs; ++ char *opts; ++ int nonopt; ++}; ++ ++void fuse_opt_free_args(struct fuse_args *args) ++{ ++ if (args) { ++ if (args->argv && args->allocated) { ++ int i; ++ for (i = 0; i < args->argc; i++) ++ free(args->argv[i]); ++ free(args->argv); ++ } ++ args->argc = 0; ++ args->argv = NULL; ++ args->allocated = 0; ++ } ++} ++ ++static int alloc_failed(void) ++{ ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++} ++ ++int fuse_opt_add_arg(struct fuse_args *args, const char *arg) ++{ ++ char **newargv; ++ char *newarg; ++ ++ assert(!args->argv || args->allocated); ++ ++ newarg = strdup(arg); ++ if (!newarg) ++ return alloc_failed(); ++ ++ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); ++ if (!newargv) { ++ free(newarg); ++ return alloc_failed(); ++ } ++ ++ args->argv = newargv; ++ args->allocated = 1; ++ args->argv[args->argc++] = newarg; ++ args->argv[args->argc] = NULL; ++ return 0; ++} ++ ++static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, ++ const char *arg) ++{ ++ assert(pos <= args->argc); ++ if (fuse_opt_add_arg(args, arg) == -1) ++ return -1; ++ ++ if (pos != args->argc - 1) { ++ char *newarg = args->argv[args->argc - 1]; ++ memmove(&args->argv[pos + 1], &args->argv[pos], ++ sizeof(char *) * (args->argc - pos - 1)); ++ args->argv[pos] = newarg; ++ } ++ return 0; ++} ++ ++int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) ++{ ++ return fuse_opt_insert_arg_common(args, pos, arg); ++} ++ ++static int next_arg(struct fuse_opt_context *ctx, const char *opt) ++{ ++ if (ctx->argctr + 1 >= ctx->argc) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); ++ return -1; ++ } ++ ctx->argctr++; ++ return 0; ++} ++ ++static int add_arg(struct fuse_opt_context *ctx, const char *arg) ++{ ++ return fuse_opt_add_arg(&ctx->outargs, arg); ++} ++ ++static int add_opt_common(char **opts, const char *opt, int esc) ++{ ++ unsigned oldlen = *opts ? strlen(*opts) : 0; ++ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); ++ ++ if (!d) ++ return alloc_failed(); ++ ++ *opts = d; ++ if (oldlen) { ++ d += oldlen; ++ *d++ = ','; ++ } ++ ++ for (; *opt; opt++) { ++ if (esc && (*opt == ',' || *opt == '\\')) ++ *d++ = '\\'; ++ *d++ = *opt; ++ } ++ *d = '\0'; ++ ++ return 0; ++} ++ ++int fuse_opt_add_opt(char **opts, const char *opt) ++{ ++ return add_opt_common(opts, opt, 0); ++} ++ ++int fuse_opt_add_opt_escaped(char **opts, const char *opt) ++{ ++ return add_opt_common(opts, opt, 1); ++} ++ ++static int add_opt(struct fuse_opt_context *ctx, const char *opt) ++{ ++ return add_opt_common(&ctx->opts, opt, 1); ++} ++ ++static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, ++ int iso) ++{ ++ if (key == FUSE_OPT_KEY_DISCARD) ++ return 0; ++ ++ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { ++ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); ++ if (res == -1 || !res) ++ return res; ++ } ++ if (iso) ++ return add_opt(ctx, arg); ++ else ++ return add_arg(ctx, arg); ++} ++ ++static int match_template(const char *t, const char *arg, unsigned *sepp) ++{ ++ int arglen = strlen(arg); ++ const char *sep = strchr(t, '='); ++ sep = sep ? sep : strchr(t, ' '); ++ if (sep && (!sep[1] || sep[1] == '%')) { ++ int tlen = sep - t; ++ if (sep[0] == '=') ++ tlen ++; ++ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { ++ *sepp = sep - t; ++ return 1; ++ } ++ } ++ if (strcmp(t, arg) == 0) { ++ *sepp = 0; ++ return 1; ++ } ++ return 0; ++} ++ ++static const struct fuse_opt *find_opt(const struct fuse_opt *opt, ++ const char *arg, unsigned *sepp) ++{ ++ for (; opt && opt->templ; opt++) ++ if (match_template(opt->templ, arg, sepp)) ++ return opt; ++ return NULL; ++} ++ ++int fuse_opt_match(const struct fuse_opt *opts, const char *opt) ++{ ++ unsigned dummy; ++ return find_opt(opts, opt, &dummy) ? 1 : 0; ++} ++ ++static int process_opt_param(void *var, const char *format, const char *param, ++ const char *arg) ++{ ++ assert(format[0] == '%'); ++ if (format[1] == 's') { ++ char **s = var; ++ char *copy = strdup(param); ++ if (!copy) ++ return alloc_failed(); ++ ++ free(*s); ++ *s = copy; ++ } else { ++ if (sscanf(param, format, var) != 1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); ++ return -1; ++ } ++ } ++ return 0; ++} ++ ++static int process_opt(struct fuse_opt_context *ctx, ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) ++{ ++ if (opt->offset == -1U) { ++ if (call_proc(ctx, arg, opt->value, iso) == -1) ++ return -1; ++ } else { ++ void *var = (char *)ctx->data + opt->offset; ++ if (sep && opt->templ[sep + 1]) { ++ const char *param = arg + sep; ++ if (opt->templ[sep] == '=') ++ param ++; ++ if (process_opt_param(var, opt->templ + sep + 1, ++ param, arg) == -1) ++ return -1; ++ } else ++ *(int *)var = opt->value; ++ } ++ return 0; ++} ++ ++static int process_opt_sep_arg(struct fuse_opt_context *ctx, ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) ++{ ++ int res; ++ char *newarg; ++ char *param; ++ ++ if (next_arg(ctx, arg) == -1) ++ return -1; ++ ++ param = ctx->argv[ctx->argctr]; ++ newarg = malloc(sep + strlen(param) + 1); ++ if (!newarg) ++ return alloc_failed(); ++ ++ memcpy(newarg, arg, sep); ++ strcpy(newarg + sep, param); ++ res = process_opt(ctx, opt, sep, newarg, iso); ++ free(newarg); ++ ++ return res; ++} ++ ++static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) ++{ ++ unsigned sep; ++ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); ++ if (opt) { ++ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { ++ int res; ++ if (sep && opt->templ[sep] == ' ' && !arg[sep]) ++ res = process_opt_sep_arg(ctx, opt, sep, arg, ++ iso); ++ else ++ res = process_opt(ctx, opt, sep, arg, iso); ++ if (res == -1) ++ return -1; ++ } ++ return 0; ++ } else ++ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++} ++ ++static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) ++{ ++ char *s = opts; ++ char *d = s; ++ int end = 0; ++ ++ while (!end) { ++ if (*s == '\0') ++ end = 1; ++ if (*s == ',' || end) { ++ int res; ++ ++ *d = '\0'; ++ res = process_gopt(ctx, opts, 1); ++ if (res == -1) ++ return -1; ++ d = opts; ++ } else { ++ if (s[0] == '\\' && s[1] != '\0') { ++ s++; ++ if (s[0] >= '0' && s[0] <= '3' && ++ s[1] >= '0' && s[1] <= '7' && ++ s[2] >= '0' && s[2] <= '7') { ++ *d++ = (s[0] - '0') * 0100 + ++ (s[1] - '0') * 0010 + ++ (s[2] - '0'); ++ s += 2; ++ } else { ++ *d++ = *s; ++ } ++ } else { ++ *d++ = *s; ++ } ++ } ++ s++; ++ } ++ ++ return 0; ++} ++ ++static int process_option_group(struct fuse_opt_context *ctx, const char *opts) ++{ ++ int res; ++ char *copy = strdup(opts); ++ ++ if (!copy) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++ res = process_real_option_group(ctx, copy); ++ free(copy); ++ return res; ++} ++ ++static int process_one(struct fuse_opt_context *ctx, const char *arg) ++{ ++ if (ctx->nonopt || arg[0] != '-') ++ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); ++ else if (arg[1] == 'o') { ++ if (arg[2]) ++ return process_option_group(ctx, arg + 2); ++ else { ++ if (next_arg(ctx, arg) == -1) ++ return -1; ++ ++ return process_option_group(ctx, ++ ctx->argv[ctx->argctr]); ++ } ++ } else if (arg[1] == '-' && !arg[2]) { ++ if (add_arg(ctx, arg) == -1) ++ return -1; ++ ctx->nonopt = ctx->outargs.argc; ++ return 0; ++ } else ++ return process_gopt(ctx, arg, 0); ++} ++ ++static int opt_parse(struct fuse_opt_context *ctx) ++{ ++ if (ctx->argc) { ++ if (add_arg(ctx, ctx->argv[0]) == -1) ++ return -1; ++ } ++ ++ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) ++ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) ++ return -1; ++ ++ if (ctx->opts) { ++ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || ++ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) ++ return -1; ++ } ++ ++ /* If option separator ("--") is the last argument, remove it */ ++ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && ++ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { ++ free(ctx->outargs.argv[ctx->outargs.argc - 1]); ++ ctx->outargs.argv[--ctx->outargs.argc] = NULL; ++ } ++ ++ return 0; ++} ++ ++int fuse_opt_parse(struct fuse_args *args, void *data, ++ const struct fuse_opt opts[], fuse_opt_proc_t proc) ++{ ++ int res; ++ struct fuse_opt_context ctx = { ++ .data = data, ++ .opt = opts, ++ .proc = proc, ++ }; ++ ++ if (!args || !args->argv || !args->argc) ++ return 0; ++ ++ ctx.argc = args->argc; ++ ctx.argv = args->argv; ++ ++ res = opt_parse(&ctx); ++ if (res != -1) { ++ struct fuse_args tmp = *args; ++ *args = ctx.outargs; ++ ctx.outargs = tmp; ++ } ++ free(ctx.opts); ++ fuse_opt_free_args(&ctx.outargs); ++ return res; ++} +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +new file mode 100644 +index 0000000..4271947 +--- /dev/null ++++ b/tools/virtiofsd/fuse_signals.c +@@ -0,0 +1,91 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Utility functions for setting signal handlers. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "config.h" ++#include "fuse_lowlevel.h" ++#include "fuse_i.h" ++ ++#include ++#include ++#include ++#include ++ ++static struct fuse_session *fuse_instance; ++ ++static void exit_handler(int sig) ++{ ++ if (fuse_instance) { ++ fuse_session_exit(fuse_instance); ++ if(sig <= 0) { ++ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); ++ abort(); ++ } ++ fuse_instance->error = sig; ++ } ++} ++ ++static void do_nothing(int sig) ++{ ++ (void) sig; ++} ++ ++static int set_one_signal_handler(int sig, void (*handler)(int), int remove) ++{ ++ struct sigaction sa; ++ struct sigaction old_sa; ++ ++ memset(&sa, 0, sizeof(struct sigaction)); ++ sa.sa_handler = remove ? SIG_DFL : handler; ++ sigemptyset(&(sa.sa_mask)); ++ sa.sa_flags = 0; ++ ++ if (sigaction(sig, NULL, &old_sa) == -1) { ++ perror("fuse: cannot get old signal handler"); ++ return -1; ++ } ++ ++ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && ++ sigaction(sig, &sa, NULL) == -1) { ++ perror("fuse: cannot set signal handler"); ++ return -1; ++ } ++ return 0; ++} ++ ++int fuse_set_signal_handlers(struct fuse_session *se) ++{ ++ /* If we used SIG_IGN instead of the do_nothing function, ++ then we would be unable to tell if we set SIG_IGN (and ++ thus should reset to SIG_DFL in fuse_remove_signal_handlers) ++ or if it was already set to SIG_IGN (and should be left ++ untouched. */ ++ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) ++ return -1; ++ ++ fuse_instance = se; ++ return 0; ++} ++ ++void fuse_remove_signal_handlers(struct fuse_session *se) ++{ ++ if (fuse_instance != se) ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: fuse_remove_signal_handlers: unknown session\n"); ++ else ++ fuse_instance = NULL; ++ ++ set_one_signal_handler(SIGHUP, exit_handler, 1); ++ set_one_signal_handler(SIGINT, exit_handler, 1); ++ set_one_signal_handler(SIGTERM, exit_handler, 1); ++ set_one_signal_handler(SIGPIPE, do_nothing, 1); ++} +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +new file mode 100644 +index 0000000..64ff7ad +--- /dev/null ++++ b/tools/virtiofsd/helper.c +@@ -0,0 +1,440 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Helper functions to create (simple) standalone programs. With the ++ aid of these functions it should be possible to create full FUSE ++ file system by implementing nothing but the request handlers. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_misc.h" ++#include "fuse_opt.h" ++#include "fuse_lowlevel.h" ++#include "mount_util.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define FUSE_HELPER_OPT(t, p) \ ++ { t, offsetof(struct fuse_cmdline_opts, p), 1 } ++ ++static const struct fuse_opt fuse_helper_opts[] = { ++ FUSE_HELPER_OPT("-h", show_help), ++ FUSE_HELPER_OPT("--help", show_help), ++ FUSE_HELPER_OPT("-V", show_version), ++ FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("-d", debug), ++ FUSE_HELPER_OPT("debug", debug), ++ FUSE_HELPER_OPT("-d", foreground), ++ FUSE_HELPER_OPT("debug", foreground), ++ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), ++ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT("-s", singlethread), ++ FUSE_HELPER_OPT("fsname=", nodefault_subtype), ++ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), ++#ifndef __FreeBSD__ ++ FUSE_HELPER_OPT("subtype=", nodefault_subtype), ++ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), ++#endif ++ FUSE_HELPER_OPT("clone_fd", clone_fd), ++ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_OPT_END ++}; ++ ++struct fuse_conn_info_opts { ++ int atomic_o_trunc; ++ int no_remote_posix_lock; ++ int no_remote_flock; ++ int splice_write; ++ int splice_move; ++ int splice_read; ++ int no_splice_write; ++ int no_splice_move; ++ int no_splice_read; ++ int auto_inval_data; ++ int no_auto_inval_data; ++ int no_readdirplus; ++ int no_readdirplus_auto; ++ int async_dio; ++ int no_async_dio; ++ int writeback_cache; ++ int no_writeback_cache; ++ int async_read; ++ int sync_read; ++ unsigned max_write; ++ unsigned max_readahead; ++ unsigned max_background; ++ unsigned congestion_threshold; ++ unsigned time_gran; ++ int set_max_write; ++ int set_max_readahead; ++ int set_max_background; ++ int set_congestion_threshold; ++ int set_time_gran; ++}; ++ ++#define CONN_OPTION(t, p, v) \ ++ { t, offsetof(struct fuse_conn_info_opts, p), v } ++static const struct fuse_opt conn_info_opt_spec[] = { ++ CONN_OPTION("max_write=%u", max_write, 0), ++ CONN_OPTION("max_write=", set_max_write, 1), ++ CONN_OPTION("max_readahead=%u", max_readahead, 0), ++ CONN_OPTION("max_readahead=", set_max_readahead, 1), ++ CONN_OPTION("max_background=%u", max_background, 0), ++ CONN_OPTION("max_background=", set_max_background, 1), ++ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), ++ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), ++ CONN_OPTION("sync_read", sync_read, 1), ++ CONN_OPTION("async_read", async_read, 1), ++ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), ++ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("no_remote_lock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_flock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("splice_write", splice_write, 1), ++ CONN_OPTION("no_splice_write", no_splice_write, 1), ++ CONN_OPTION("splice_move", splice_move, 1), ++ CONN_OPTION("no_splice_move", no_splice_move, 1), ++ CONN_OPTION("splice_read", splice_read, 1), ++ CONN_OPTION("no_splice_read", no_splice_read, 1), ++ CONN_OPTION("auto_inval_data", auto_inval_data, 1), ++ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), ++ CONN_OPTION("readdirplus=no", no_readdirplus, 1), ++ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), ++ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), ++ CONN_OPTION("async_dio", async_dio, 1), ++ CONN_OPTION("no_async_dio", no_async_dio, 1), ++ CONN_OPTION("writeback_cache", writeback_cache, 1), ++ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), ++ CONN_OPTION("time_gran=%u", time_gran, 0), ++ CONN_OPTION("time_gran=", set_time_gran, 1), ++ FUSE_OPT_END ++}; ++ ++ ++void fuse_cmdline_help(void) ++{ ++ printf(" -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -s disable multi-threaded operation\n" ++ " -o clone_fd use separate fuse device fd for each thread\n" ++ " (may improve performance)\n" ++ " -o max_idle_threads the maximum number of idle worker threads\n" ++ " allowed (default: 10)\n"); ++} ++ ++static int fuse_helper_opt_proc(void *data, const char *arg, int key, ++ struct fuse_args *outargs) ++{ ++ (void) outargs; ++ struct fuse_cmdline_opts *opts = data; ++ ++ switch (key) { ++ case FUSE_OPT_KEY_NONOPT: ++ if (!opts->mountpoint) { ++ if (fuse_mnt_parse_fuse_fd(arg) != -1) { ++ return fuse_opt_add_opt(&opts->mountpoint, arg); ++ } ++ ++ char mountpoint[PATH_MAX] = ""; ++ if (realpath(arg, mountpoint) == NULL) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: bad mount point `%s': %s\n", ++ arg, strerror(errno)); ++ return -1; ++ } ++ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); ++ } else { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; ++ } ++ ++ default: ++ /* Pass through unknown options */ ++ return 1; ++ } ++} ++ ++/* Under FreeBSD, there is no subtype option so this ++ function actually sets the fsname */ ++static int add_default_subtype(const char *progname, struct fuse_args *args) ++{ ++ int res; ++ char *subtype_opt; ++ ++ const char *basename = strrchr(progname, '/'); ++ if (basename == NULL) ++ basename = progname; ++ else if (basename[1] != '\0') ++ basename++; ++ ++ subtype_opt = (char *) malloc(strlen(basename) + 64); ++ if (subtype_opt == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++#ifdef __FreeBSD__ ++ sprintf(subtype_opt, "-ofsname=%s", basename); ++#else ++ sprintf(subtype_opt, "-osubtype=%s", basename); ++#endif ++ res = fuse_opt_add_arg(args, subtype_opt); ++ free(subtype_opt); ++ return res; ++} ++ ++int fuse_parse_cmdline(struct fuse_args *args, ++ struct fuse_cmdline_opts *opts) ++{ ++ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); ++ ++ opts->max_idle_threads = 10; ++ ++ if (fuse_opt_parse(args, opts, fuse_helper_opts, ++ fuse_helper_opt_proc) == -1) ++ return -1; ++ ++ /* *Linux*: if neither -o subtype nor -o fsname are specified, ++ set subtype to program's basename. ++ *FreeBSD*: if fsname is not specified, set to program's ++ basename. */ ++ if (!opts->nodefault_subtype) ++ if (add_default_subtype(args->argv[0], args) == -1) ++ return -1; ++ ++ return 0; ++} ++ ++ ++int fuse_daemonize(int foreground) ++{ ++ if (!foreground) { ++ int nullfd; ++ int waiter[2]; ++ char completed; ++ ++ if (pipe(waiter)) { ++ perror("fuse_daemonize: pipe"); ++ return -1; ++ } ++ ++ /* ++ * demonize current process by forking it and killing the ++ * parent. This makes current process as a child of 'init'. ++ */ ++ switch(fork()) { ++ case -1: ++ perror("fuse_daemonize: fork"); ++ return -1; ++ case 0: ++ break; ++ default: ++ (void) read(waiter[0], &completed, sizeof(completed)); ++ _exit(0); ++ } ++ ++ if (setsid() == -1) { ++ perror("fuse_daemonize: setsid"); ++ return -1; ++ } ++ ++ (void) chdir("/"); ++ ++ nullfd = open("/dev/null", O_RDWR, 0); ++ if (nullfd != -1) { ++ (void) dup2(nullfd, 0); ++ (void) dup2(nullfd, 1); ++ (void) dup2(nullfd, 2); ++ if (nullfd > 2) ++ close(nullfd); ++ } ++ ++ /* Propagate completion of daemon initialization */ ++ completed = 1; ++ (void) write(waiter[1], &completed, sizeof(completed)); ++ close(waiter[0]); ++ close(waiter[1]); ++ } else { ++ (void) chdir("/"); ++ } ++ return 0; ++} ++ ++int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, ++ size_t op_size, void *user_data) ++{ ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse *fuse; ++ struct fuse_cmdline_opts opts; ++ int res; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) ++ return 1; ++ ++ if (opts.show_version) { ++ printf("FUSE library version %s\n", PACKAGE_VERSION); ++ fuse_lowlevel_version(); ++ res = 0; ++ goto out1; ++ } ++ ++ if (opts.show_help) { ++ if(args.argv[0][0] != '\0') ++ printf("usage: %s [options] \n\n", ++ args.argv[0]); ++ printf("FUSE options:\n"); ++ fuse_cmdline_help(); ++ fuse_lib_help(&args); ++ res = 0; ++ goto out1; ++ } ++ ++ if (!opts.show_help && ++ !opts.mountpoint) { ++ fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); ++ res = 2; ++ goto out1; ++ } ++ ++ ++ fuse = fuse_new_31(&args, op, op_size, user_data); ++ if (fuse == NULL) { ++ res = 3; ++ goto out1; ++ } ++ ++ if (fuse_mount(fuse,opts.mountpoint) != 0) { ++ res = 4; ++ goto out2; ++ } ++ ++ if (fuse_daemonize(opts.foreground) != 0) { ++ res = 5; ++ goto out3; ++ } ++ ++ struct fuse_session *se = fuse_get_session(fuse); ++ if (fuse_set_signal_handlers(se) != 0) { ++ res = 6; ++ goto out3; ++ } ++ ++ if (opts.singlethread) ++ res = fuse_loop(fuse); ++ else { ++ struct fuse_loop_config loop_config; ++ loop_config.clone_fd = opts.clone_fd; ++ loop_config.max_idle_threads = opts.max_idle_threads; ++ res = fuse_loop_mt_32(fuse, &loop_config); ++ } ++ if (res) ++ res = 7; ++ ++ fuse_remove_signal_handlers(se); ++out3: ++ fuse_unmount(fuse); ++out2: ++ fuse_destroy(fuse); ++out1: ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); ++ return res; ++} ++ ++ ++void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, ++ struct fuse_conn_info *conn) ++{ ++ if(opts->set_max_write) ++ conn->max_write = opts->max_write; ++ if(opts->set_max_background) ++ conn->max_background = opts->max_background; ++ if(opts->set_congestion_threshold) ++ conn->congestion_threshold = opts->congestion_threshold; ++ if(opts->set_time_gran) ++ conn->time_gran = opts->time_gran; ++ if(opts->set_max_readahead) ++ conn->max_readahead = opts->max_readahead; ++ ++#define LL_ENABLE(cond,cap) \ ++ if (cond) conn->want |= (cap) ++#define LL_DISABLE(cond,cap) \ ++ if (cond) conn->want &= ~(cap) ++ ++ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); ++ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); ++ ++ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); ++ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); ++ ++ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); ++ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); ++ ++ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ ++ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); ++ ++ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); ++ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); ++ ++ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ ++ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); ++ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); ++ ++ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); ++ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); ++} ++ ++struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) ++{ ++ struct fuse_conn_info_opts *opts; ++ ++ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); ++ if(opts == NULL) { ++ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); ++ return NULL; ++ } ++ if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { ++ free(opts); ++ return NULL; ++ } ++ return opts; ++} ++ ++int fuse_open_channel(const char *mountpoint, const char* options) ++{ ++ struct mount_opts *opts = NULL; ++ int fd = -1; ++ const char *argv[] = { "", "-o", options }; ++ int argc = sizeof(argv) / sizeof(argv[0]); ++ struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); ++ ++ opts = parse_mount_opts(&args); ++ if (opts == NULL) ++ return -1; ++ ++ fd = fuse_kern_mount(mountpoint, opts); ++ destroy_mount_opts(opts); ++ ++ return fd; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch b/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch new file mode 100644 index 0000000..1318fef --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch @@ -0,0 +1,3172 @@ +From f6c6830f772e8060255323d2a458cd0e774d9654 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:42 +0100 +Subject: [PATCH 011/116] virtiofsd: Add fuse_lowlevel.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-8-dgilbert@redhat.com> +Patchwork-id: 93456 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 007/112] virtiofsd: Add fuse_lowlevel.c +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +fuse_lowlevel is one of the largest files from the library +and does most of the work. Add it separately to keep the diff +sizes small. +Again this is from upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2de121f01e37e2fe98a4362f4abf7c0848697f76) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 3129 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 3129 insertions(+) + create mode 100644 tools/virtiofsd/fuse_lowlevel.c + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +new file mode 100644 +index 0000000..f2d7038 +--- /dev/null ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -0,0 +1,3129 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Implementation of (most of) the low-level FUSE API. The session loop ++ functions are implemented in separate files. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#define _GNU_SOURCE ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_kernel.h" ++#include "fuse_opt.h" ++#include "fuse_misc.h" ++#include "mount_util.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef F_LINUX_SPECIFIC_BASE ++#define F_LINUX_SPECIFIC_BASE 1024 ++#endif ++#ifndef F_SETPIPE_SZ ++#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) ++#endif ++ ++ ++#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) ++#define OFFSET_MAX 0x7fffffffffffffffLL ++ ++#define container_of(ptr, type, member) ({ \ ++ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ++ (type *)( (char *)__mptr - offsetof(type,member) );}) ++ ++struct fuse_pollhandle { ++ uint64_t kh; ++ struct fuse_session *se; ++}; ++ ++static size_t pagesize; ++ ++static __attribute__((constructor)) void fuse_ll_init_pagesize(void) ++{ ++ pagesize = getpagesize(); ++} ++ ++static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) ++{ ++ attr->ino = stbuf->st_ino; ++ attr->mode = stbuf->st_mode; ++ attr->nlink = stbuf->st_nlink; ++ attr->uid = stbuf->st_uid; ++ attr->gid = stbuf->st_gid; ++ attr->rdev = stbuf->st_rdev; ++ attr->size = stbuf->st_size; ++ attr->blksize = stbuf->st_blksize; ++ attr->blocks = stbuf->st_blocks; ++ attr->atime = stbuf->st_atime; ++ attr->mtime = stbuf->st_mtime; ++ attr->ctime = stbuf->st_ctime; ++ attr->atimensec = ST_ATIM_NSEC(stbuf); ++ attr->mtimensec = ST_MTIM_NSEC(stbuf); ++ attr->ctimensec = ST_CTIM_NSEC(stbuf); ++} ++ ++static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) ++{ ++ stbuf->st_mode = attr->mode; ++ stbuf->st_uid = attr->uid; ++ stbuf->st_gid = attr->gid; ++ stbuf->st_size = attr->size; ++ stbuf->st_atime = attr->atime; ++ stbuf->st_mtime = attr->mtime; ++ stbuf->st_ctime = attr->ctime; ++ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); ++ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); ++ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); ++} ++ ++static size_t iov_length(const struct iovec *iov, size_t count) ++{ ++ size_t seg; ++ size_t ret = 0; ++ ++ for (seg = 0; seg < count; seg++) ++ ret += iov[seg].iov_len; ++ return ret; ++} ++ ++static void list_init_req(struct fuse_req *req) ++{ ++ req->next = req; ++ req->prev = req; ++} ++ ++static void list_del_req(struct fuse_req *req) ++{ ++ struct fuse_req *prev = req->prev; ++ struct fuse_req *next = req->next; ++ prev->next = next; ++ next->prev = prev; ++} ++ ++static void list_add_req(struct fuse_req *req, struct fuse_req *next) ++{ ++ struct fuse_req *prev = next->prev; ++ req->next = next; ++ req->prev = prev; ++ prev->next = req; ++ next->prev = req; ++} ++ ++static void destroy_req(fuse_req_t req) ++{ ++ pthread_mutex_destroy(&req->lock); ++ free(req); ++} ++ ++void fuse_free_req(fuse_req_t req) ++{ ++ int ctr; ++ struct fuse_session *se = req->se; ++ ++ pthread_mutex_lock(&se->lock); ++ req->u.ni.func = NULL; ++ req->u.ni.data = NULL; ++ list_del_req(req); ++ ctr = --req->ctr; ++ fuse_chan_put(req->ch); ++ req->ch = NULL; ++ pthread_mutex_unlock(&se->lock); ++ if (!ctr) ++ destroy_req(req); ++} ++ ++static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) ++{ ++ struct fuse_req *req; ++ ++ req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); ++ if (req == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); ++ } else { ++ req->se = se; ++ req->ctr = 1; ++ list_init_req(req); ++ fuse_mutex_init(&req->lock); ++ } ++ ++ return req; ++} ++ ++/* Send data. If *ch* is NULL, send via session master fd */ ++static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count) ++{ ++ struct fuse_out_header *out = iov[0].iov_base; ++ ++ out->len = iov_length(iov, count); ++ if (se->debug) { ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", ++ out->error, out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long) out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, success, outsize: %i\n", ++ (unsigned long long) out->unique, out->len); ++ } ++ } ++ ++ ssize_t res = writev(ch ? ch->fd : se->fd, ++ iov, count); ++ int err = errno; ++ ++ if (res == -1) { ++ assert(se != NULL); ++ ++ /* ENOENT means the operation was interrupted */ ++ if (!fuse_session_exited(se) && err != ENOENT) ++ perror("fuse: writing device"); ++ return -err; ++ } ++ ++ return 0; ++} ++ ++ ++int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, ++ int count) ++{ ++ struct fuse_out_header out; ++ ++ if (error <= -1000 || error > 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); ++ error = -ERANGE; ++ } ++ ++ out.unique = req->unique; ++ out.error = error; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ return fuse_send_msg(req->se, req->ch, iov, count); ++} ++ ++static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, ++ int count) ++{ ++ int res; ++ ++ res = fuse_send_reply_iov_nofree(req, error, iov, count); ++ fuse_free_req(req); ++ return res; ++} ++ ++static int send_reply(fuse_req_t req, int error, const void *arg, ++ size_t argsize) ++{ ++ struct iovec iov[2]; ++ int count = 1; ++ if (argsize) { ++ iov[1].iov_base = (void *) arg; ++ iov[1].iov_len = argsize; ++ count++; ++ } ++ return send_reply_iov(req, error, iov, count); ++} ++ ++int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) ++{ ++ int res; ++ struct iovec *padded_iov; ++ ++ padded_iov = malloc((count + 1) * sizeof(struct iovec)); ++ if (padded_iov == NULL) ++ return fuse_reply_err(req, ENOMEM); ++ ++ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); ++ count++; ++ ++ res = send_reply_iov(req, 0, padded_iov, count); ++ free(padded_iov); ++ ++ return res; ++} ++ ++ ++/* `buf` is allowed to be empty so that the proper size may be ++ allocated by the caller */ ++size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, const struct stat *stbuf, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ struct fuse_dirent *dirent; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ ++ if ((buf == NULL) || (entlen_padded > bufsize)) ++ return entlen_padded; ++ ++ dirent = (struct fuse_dirent*) buf; ++ dirent->ino = stbuf->st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void convert_statfs(const struct statvfs *stbuf, ++ struct fuse_kstatfs *kstatfs) ++{ ++ kstatfs->bsize = stbuf->f_bsize; ++ kstatfs->frsize = stbuf->f_frsize; ++ kstatfs->blocks = stbuf->f_blocks; ++ kstatfs->bfree = stbuf->f_bfree; ++ kstatfs->bavail = stbuf->f_bavail; ++ kstatfs->files = stbuf->f_files; ++ kstatfs->ffree = stbuf->f_ffree; ++ kstatfs->namelen = stbuf->f_namemax; ++} ++ ++static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) ++{ ++ return send_reply(req, 0, arg, argsize); ++} ++ ++int fuse_reply_err(fuse_req_t req, int err) ++{ ++ return send_reply(req, -err, NULL, 0); ++} ++ ++void fuse_reply_none(fuse_req_t req) ++{ ++ fuse_free_req(req); ++} ++ ++static unsigned long calc_timeout_sec(double t) ++{ ++ if (t > (double) ULONG_MAX) ++ return ULONG_MAX; ++ else if (t < 0.0) ++ return 0; ++ else ++ return (unsigned long) t; ++} ++ ++static unsigned int calc_timeout_nsec(double t) ++{ ++ double f = t - (double) calc_timeout_sec(t); ++ if (f < 0.0) ++ return 0; ++ else if (f >= 0.999999999) ++ return 999999999; ++ else ++ return (unsigned int) (f * 1.0e9); ++} ++ ++static void fill_entry(struct fuse_entry_out *arg, ++ const struct fuse_entry_param *e) ++{ ++ arg->nodeid = e->ino; ++ arg->generation = e->generation; ++ arg->entry_valid = calc_timeout_sec(e->entry_timeout); ++ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); ++ arg->attr_valid = calc_timeout_sec(e->attr_timeout); ++ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ convert_stat(&e->attr, &arg->attr); ++} ++ ++/* `buf` is allowed to be empty so that the proper size may be ++ allocated by the caller */ ++size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, ++ const struct fuse_entry_param *e, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ if ((buf == NULL) || (entlen_padded > bufsize)) ++ return entlen_padded; ++ ++ struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; ++ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); ++ fill_entry(&dp->entry_out, e); ++ ++ struct fuse_dirent *dirent = &dp->dirent; ++ dirent->ino = e->attr.st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void fill_open(struct fuse_open_out *arg, ++ const struct fuse_file_info *f) ++{ ++ arg->fh = f->fh; ++ if (f->direct_io) ++ arg->open_flags |= FOPEN_DIRECT_IO; ++ if (f->keep_cache) ++ arg->open_flags |= FOPEN_KEEP_CACHE; ++ if (f->cache_readdir) ++ arg->open_flags |= FOPEN_CACHE_DIR; ++ if (f->nonseekable) ++ arg->open_flags |= FOPEN_NONSEEKABLE; ++} ++ ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) ++{ ++ struct fuse_entry_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); ++ ++ /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant ++ negative entry */ ++ if (!e->ino && req->se->conn.proto_minor < 4) ++ return fuse_reply_err(req, ENOENT); ++ ++ memset(&arg, 0, sizeof(arg)); ++ fill_entry(&arg, e); ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, ++ const struct fuse_file_info *f) ++{ ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; ++ size_t entrysize = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); ++ struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; ++ struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); ++ ++ memset(buf, 0, sizeof(buf)); ++ fill_entry(earg, e); ++ fill_open(oarg, f); ++ return send_reply_ok(req, buf, ++ entrysize + sizeof(struct fuse_open_out)); ++} ++ ++int fuse_reply_attr(fuse_req_t req, const struct stat *attr, ++ double attr_timeout) ++{ ++ struct fuse_attr_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.attr_valid = calc_timeout_sec(attr_timeout); ++ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); ++ convert_stat(attr, &arg.attr); ++ ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_readlink(fuse_req_t req, const char *linkname) ++{ ++ return send_reply_ok(req, linkname, strlen(linkname)); ++} ++ ++int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) ++{ ++ struct fuse_open_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ fill_open(&arg, f); ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_write(fuse_req_t req, size_t count) ++{ ++ struct fuse_write_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) ++{ ++ return send_reply_ok(req, buf, size); ++} ++ ++static int fuse_send_data_iov_fallback(struct fuse_session *se, ++ struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, ++ size_t len) ++{ ++ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); ++ void *mbuf; ++ int res; ++ ++ /* Optimize common case */ ++ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && ++ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { ++ /* FIXME: also avoid memory copy if there are multiple buffers ++ but none of them contain an fd */ ++ ++ iov[iov_count].iov_base = buf->buf[0].mem; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ return fuse_send_msg(se, ch, iov, iov_count); ++ } ++ ++ res = posix_memalign(&mbuf, pagesize, len); ++ if (res != 0) ++ return res; ++ ++ mem_buf.buf[0].mem = mbuf; ++ res = fuse_buf_copy(&mem_buf, buf, 0); ++ if (res < 0) { ++ free(mbuf); ++ return -res; ++ } ++ len = res; ++ ++ iov[iov_count].iov_base = mbuf; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ res = fuse_send_msg(se, ch, iov, iov_count); ++ free(mbuf); ++ ++ return res; ++} ++ ++struct fuse_ll_pipe { ++ size_t size; ++ int can_grow; ++ int pipe[2]; ++}; ++ ++static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) ++{ ++ close(llp->pipe[0]); ++ close(llp->pipe[1]); ++ free(llp); ++} ++ ++#ifdef HAVE_SPLICE ++#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) ++static int fuse_pipe(int fds[2]) ++{ ++ int rv = pipe(fds); ++ ++ if (rv == -1) ++ return rv; ++ ++ if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || ++ fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || ++ fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || ++ fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { ++ close(fds[0]); ++ close(fds[1]); ++ rv = -1; ++ } ++ return rv; ++} ++#else ++static int fuse_pipe(int fds[2]) ++{ ++ return pipe2(fds, O_CLOEXEC | O_NONBLOCK); ++} ++#endif ++ ++static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); ++ if (llp == NULL) { ++ int res; ++ ++ llp = malloc(sizeof(struct fuse_ll_pipe)); ++ if (llp == NULL) ++ return NULL; ++ ++ res = fuse_pipe(llp->pipe); ++ if (res == -1) { ++ free(llp); ++ return NULL; ++ } ++ ++ /* ++ *the default size is 16 pages on linux ++ */ ++ llp->size = pagesize * 16; ++ llp->can_grow = 1; ++ ++ pthread_setspecific(se->pipe_key, llp); ++ } ++ ++ return llp; ++} ++#endif ++ ++static void fuse_ll_clear_pipe(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); ++ if (llp) { ++ pthread_setspecific(se->pipe_key, NULL); ++ fuse_ll_pipe_free(llp); ++ } ++} ++ ++#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) ++static int read_back(int fd, char *buf, size_t len) ++{ ++ int res; ++ ++ res = read(fd, buf, len); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); ++ return -EIO; ++ } ++ if (res != len) { ++ fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); ++ return -EIO; ++ } ++ return 0; ++} ++ ++static int grow_pipe_to_max(int pipefd) ++{ ++ int max; ++ int res; ++ int maxfd; ++ char buf[32]; ++ ++ maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); ++ if (maxfd < 0) ++ return -errno; ++ ++ res = read(maxfd, buf, sizeof(buf) - 1); ++ if (res < 0) { ++ int saved_errno; ++ ++ saved_errno = errno; ++ close(maxfd); ++ return -saved_errno; ++ } ++ close(maxfd); ++ buf[res] = '\0'; ++ ++ max = atoi(buf); ++ res = fcntl(pipefd, F_SETPIPE_SZ, max); ++ if (res < 0) ++ return -errno; ++ return max; ++} ++ ++static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) ++{ ++ int res; ++ size_t len = fuse_buf_size(buf); ++ struct fuse_out_header *out = iov[0].iov_base; ++ struct fuse_ll_pipe *llp; ++ int splice_flags; ++ size_t pipesize; ++ size_t total_fd_size; ++ size_t idx; ++ size_t headerlen; ++ struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); ++ ++ if (se->broken_splice_nonblock) ++ goto fallback; ++ ++ if (flags & FUSE_BUF_NO_SPLICE) ++ goto fallback; ++ ++ total_fd_size = 0; ++ for (idx = buf->idx; idx < buf->count; idx++) { ++ if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { ++ total_fd_size = buf->buf[idx].size; ++ if (idx == buf->idx) ++ total_fd_size -= buf->off; ++ } ++ } ++ if (total_fd_size < 2 * pagesize) ++ goto fallback; ++ ++ if (se->conn.proto_minor < 14 || ++ !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) ++ goto fallback; ++ ++ llp = fuse_ll_get_pipe(se); ++ if (llp == NULL) ++ goto fallback; ++ ++ ++ headerlen = iov_length(iov, iov_count); ++ ++ out->len = headerlen + len; ++ ++ /* ++ * Heuristic for the required pipe size, does not work if the ++ * source contains less than page size fragments ++ */ ++ pipesize = pagesize * (iov_count + buf->count + 1) + out->len; ++ ++ if (llp->size < pipesize) { ++ if (llp->can_grow) { ++ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); ++ if (res == -1) { ++ res = grow_pipe_to_max(llp->pipe[0]); ++ if (res > 0) ++ llp->size = res; ++ llp->can_grow = 0; ++ goto fallback; ++ } ++ llp->size = res; ++ } ++ if (llp->size < pipesize) ++ goto fallback; ++ } ++ ++ ++ res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); ++ if (res == -1) ++ goto fallback; ++ ++ if (res != headerlen) { ++ res = -EIO; ++ fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, ++ headerlen); ++ goto clear_pipe; ++ } ++ ++ pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; ++ pipe_buf.buf[0].fd = llp->pipe[1]; ++ ++ res = fuse_buf_copy(&pipe_buf, buf, ++ FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); ++ if (res < 0) { ++ if (res == -EAGAIN || res == -EINVAL) { ++ /* ++ * Should only get EAGAIN on kernels with ++ * broken SPLICE_F_NONBLOCK support (<= ++ * 2.6.35) where this error or a short read is ++ * returned even if the pipe itself is not ++ * full ++ * ++ * EINVAL might mean that splice can't handle ++ * this combination of input and output. ++ */ ++ if (res == -EAGAIN) ++ se->broken_splice_nonblock = 1; ++ ++ pthread_setspecific(se->pipe_key, NULL); ++ fuse_ll_pipe_free(llp); ++ goto fallback; ++ } ++ res = -res; ++ goto clear_pipe; ++ } ++ ++ if (res != 0 && res < len) { ++ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); ++ void *mbuf; ++ size_t now_len = res; ++ /* ++ * For regular files a short count is either ++ * 1) due to EOF, or ++ * 2) because of broken SPLICE_F_NONBLOCK (see above) ++ * ++ * For other inputs it's possible that we overflowed ++ * the pipe because of small buffer fragments. ++ */ ++ ++ res = posix_memalign(&mbuf, pagesize, len); ++ if (res != 0) ++ goto clear_pipe; ++ ++ mem_buf.buf[0].mem = mbuf; ++ mem_buf.off = now_len; ++ res = fuse_buf_copy(&mem_buf, buf, 0); ++ if (res > 0) { ++ char *tmpbuf; ++ size_t extra_len = res; ++ /* ++ * Trickiest case: got more data. Need to get ++ * back the data from the pipe and then fall ++ * back to regular write. ++ */ ++ tmpbuf = malloc(headerlen); ++ if (tmpbuf == NULL) { ++ free(mbuf); ++ res = ENOMEM; ++ goto clear_pipe; ++ } ++ res = read_back(llp->pipe[0], tmpbuf, headerlen); ++ free(tmpbuf); ++ if (res != 0) { ++ free(mbuf); ++ goto clear_pipe; ++ } ++ res = read_back(llp->pipe[0], mbuf, now_len); ++ if (res != 0) { ++ free(mbuf); ++ goto clear_pipe; ++ } ++ len = now_len + extra_len; ++ iov[iov_count].iov_base = mbuf; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ res = fuse_send_msg(se, ch, iov, iov_count); ++ free(mbuf); ++ return res; ++ } ++ free(mbuf); ++ res = now_len; ++ } ++ len = res; ++ out->len = headerlen + len; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, success, outsize: %i (splice)\n", ++ (unsigned long long) out->unique, out->len); ++ } ++ ++ splice_flags = 0; ++ if ((flags & FUSE_BUF_SPLICE_MOVE) && ++ (se->conn.want & FUSE_CAP_SPLICE_MOVE)) ++ splice_flags |= SPLICE_F_MOVE; ++ ++ res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, ++ NULL, out->len, splice_flags); ++ if (res == -1) { ++ res = -errno; ++ perror("fuse: splice from pipe"); ++ goto clear_pipe; ++ } ++ if (res != out->len) { ++ res = -EIO; ++ fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", ++ res, out->len); ++ goto clear_pipe; ++ } ++ return 0; ++ ++clear_pipe: ++ fuse_ll_clear_pipe(se); ++ return res; ++ ++fallback: ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++} ++#else ++static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) ++{ ++ size_t len = fuse_buf_size(buf); ++ (void) flags; ++ ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++} ++#endif ++ ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) ++{ ++ struct iovec iov[2]; ++ struct fuse_out_header out; ++ int res; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ out.unique = req->unique; ++ out.error = 0; ++ ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ if (res <= 0) { ++ fuse_free_req(req); ++ return res; ++ } else { ++ return fuse_reply_err(req, res); ++ } ++} ++ ++int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) ++{ ++ struct fuse_statfs_out arg; ++ size_t size = req->se->conn.proto_minor < 4 ? ++ FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ ++ memset(&arg, 0, sizeof(arg)); ++ convert_statfs(stbuf, &arg.st); ++ ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_xattr(fuse_req_t req, size_t count) ++{ ++ struct fuse_getxattr_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_lock(fuse_req_t req, const struct flock *lock) ++{ ++ struct fuse_lk_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.lk.type = lock->l_type; ++ if (lock->l_type != F_UNLCK) { ++ arg.lk.start = lock->l_start; ++ if (lock->l_len == 0) ++ arg.lk.end = OFFSET_MAX; ++ else ++ arg.lk.end = lock->l_start + lock->l_len - 1; ++ } ++ arg.lk.pid = lock->l_pid; ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_bmap(fuse_req_t req, uint64_t idx) ++{ ++ struct fuse_bmap_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.block = idx; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, ++ size_t count) ++{ ++ struct fuse_ioctl_iovec *fiov; ++ size_t i; ++ ++ fiov = malloc(sizeof(fiov[0]) * count); ++ if (!fiov) ++ return NULL; ++ ++ for (i = 0; i < count; i++) { ++ fiov[i].base = (uintptr_t) iov[i].iov_base; ++ fiov[i].len = iov[i].iov_len; ++ } ++ ++ return fiov; ++} ++ ++int fuse_reply_ioctl_retry(fuse_req_t req, ++ const struct iovec *in_iov, size_t in_count, ++ const struct iovec *out_iov, size_t out_count) ++{ ++ struct fuse_ioctl_out arg; ++ struct fuse_ioctl_iovec *in_fiov = NULL; ++ struct fuse_ioctl_iovec *out_fiov = NULL; ++ struct iovec iov[4]; ++ size_t count = 1; ++ int res; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.flags |= FUSE_IOCTL_RETRY; ++ arg.in_iovs = in_count; ++ arg.out_iovs = out_count; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (req->se->conn.proto_minor < 16) { ++ if (in_count) { ++ iov[count].iov_base = (void *)in_iov; ++ iov[count].iov_len = sizeof(in_iov[0]) * in_count; ++ count++; ++ } ++ ++ if (out_count) { ++ iov[count].iov_base = (void *)out_iov; ++ iov[count].iov_len = sizeof(out_iov[0]) * out_count; ++ count++; ++ } ++ } else { ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } ++ ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) ++ goto enomem; ++ ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) ++ goto enomem; ++ ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; ++ } ++ } ++ ++ res = send_reply_iov(req, 0, iov, count); ++out: ++ free(in_fiov); ++ free(out_fiov); ++ ++ return res; ++ ++enomem: ++ res = fuse_reply_err(req, ENOMEM); ++ goto out; ++} ++ ++int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) ++{ ++ struct fuse_ioctl_out arg; ++ struct iovec iov[3]; ++ size_t count = 1; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (size) { ++ iov[count].iov_base = (char *) buf; ++ iov[count].iov_len = size; ++ count++; ++ } ++ ++ return send_reply_iov(req, 0, iov, count); ++} ++ ++int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, ++ int count) ++{ ++ struct iovec *padded_iov; ++ struct fuse_ioctl_out arg; ++ int res; ++ ++ padded_iov = malloc((count + 2) * sizeof(struct iovec)); ++ if (padded_iov == NULL) ++ return fuse_reply_err(req, ENOMEM); ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ padded_iov[1].iov_base = &arg; ++ padded_iov[1].iov_len = sizeof(arg); ++ ++ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); ++ ++ res = send_reply_iov(req, 0, padded_iov, count + 2); ++ free(padded_iov); ++ ++ return res; ++} ++ ++int fuse_reply_poll(fuse_req_t req, unsigned revents) ++{ ++ struct fuse_poll_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.revents = revents; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_lseek(fuse_req_t req, off_t off) ++{ ++ struct fuse_lseek_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.offset = off; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.lookup) ++ req->se->op.lookup(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; ++ ++ if (req->se->op.forget) ++ req->se->op.forget(req, nodeid, arg->nlookup); ++ else ++ fuse_reply_none(req); ++} ++ ++static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg) ++{ ++ struct fuse_batch_forget_in *arg = (void *) inarg; ++ struct fuse_forget_one *param = (void *) PARAM(arg); ++ unsigned int i; ++ ++ (void) nodeid; ++ ++ if (req->se->op.forget_multi) { ++ req->se->op.forget_multi(req, arg->count, ++ (struct fuse_forget_data *) param); ++ } else if (req->se->op.forget) { ++ for (i = 0; i < arg->count; i++) { ++ struct fuse_forget_one *forget = ¶m[i]; ++ struct fuse_req *dummy_req; ++ ++ dummy_req = fuse_ll_alloc_req(req->se); ++ if (dummy_req == NULL) ++ break; ++ ++ dummy_req->unique = req->unique; ++ dummy_req->ctx = req->ctx; ++ dummy_req->ch = NULL; ++ ++ req->se->op.forget(dummy_req, forget->nodeid, ++ forget->nlookup); ++ } ++ fuse_reply_none(req); ++ } else { ++ fuse_reply_none(req); ++ } ++} ++ ++static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_file_info *fip = NULL; ++ struct fuse_file_info fi; ++ ++ if (req->se->conn.proto_minor >= 9) { ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; ++ ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; ++ } ++ } ++ ++ if (req->se->op.getattr) ++ req->se->op.getattr(req, nodeid, fip); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; ++ ++ if (req->se->op.setattr) { ++ struct fuse_file_info *fi = NULL; ++ struct fuse_file_info fi_store; ++ struct stat stbuf; ++ memset(&stbuf, 0, sizeof(stbuf)); ++ convert_attr(arg, &stbuf); ++ if (arg->valid & FATTR_FH) { ++ arg->valid &= ~FATTR_FH; ++ memset(&fi_store, 0, sizeof(fi_store)); ++ fi = &fi_store; ++ fi->fh = arg->fh; ++ } ++ arg->valid &= ++ FUSE_SET_ATTR_MODE | ++ FUSE_SET_ATTR_UID | ++ FUSE_SET_ATTR_GID | ++ FUSE_SET_ATTR_SIZE | ++ FUSE_SET_ATTR_ATIME | ++ FUSE_SET_ATTR_MTIME | ++ FUSE_SET_ATTR_ATIME_NOW | ++ FUSE_SET_ATTR_MTIME_NOW | ++ FUSE_SET_ATTR_CTIME; ++ ++ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_access_in *arg = (struct fuse_access_in *) inarg; ++ ++ if (req->se->op.access) ++ req->se->op.access(req, nodeid, arg->mask); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ (void) inarg; ++ ++ if (req->se->op.readlink) ++ req->se->op.readlink(req, nodeid); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; ++ char *name = PARAM(arg); ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ else ++ name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ ++ if (req->se->op.mknod) ++ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ ++ if (req->se->op.mkdir) ++ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.unlink) ++ req->se->op.unlink(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.rmdir) ++ req->se->op.rmdir(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; ++ ++ if (req->se->op.symlink) ++ req->se->op.symlink(req, linkname, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; ++ ++ if (req->se->op.rename) ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ 0); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; ++ ++ if (req->se->op.rename) ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_link_in *arg = (struct fuse_link_in *) inarg; ++ ++ if (req->se->op.link) ++ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_create_in *arg = (struct fuse_create_in *) inarg; ++ ++ if (req->se->op.create) { ++ struct fuse_file_info fi; ++ char *name = PARAM(arg); ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ else ++ name = (char *) inarg + sizeof(struct fuse_open_in); ++ ++ req->se->op.create(req, nodeid, name, arg->mode, &fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->op.open) ++ req->se->op.open(req, nodeid, &fi); ++ else ++ fuse_reply_open(req, &fi); ++} ++ ++static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ ++ if (req->se->op.read) { ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 9) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ } ++ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; ++ struct fuse_file_info fi; ++ char *param; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ ++ if (req->se->conn.proto_minor < 9) { ++ param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); ++ } ++ ++ if (req->se->op.write) ++ req->se->op.write(req, nodeid, param, arg->size, ++ arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ ++ if (se->conn.proto_minor < 9) { ++ bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ FUSE_COMPAT_WRITE_IN_SIZE; ++ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) ++ bufv.buf[0].mem = PARAM(arg); ++ ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in); ++ } ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ ++out: ++ /* Need to reset the pipe if ->write_buf() didn't consume all data */ ++ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) ++ fuse_ll_clear_pipe(se); ++} ++ ++static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.flush = 1; ++ if (req->se->conn.proto_minor >= 7) ++ fi.lock_owner = arg->lock_owner; ++ ++ if (req->se->op.flush) ++ req->se->op.flush(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 8) { ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; ++ } ++ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { ++ fi.flock_release = 1; ++ fi.lock_owner = arg->lock_owner; ++ } ++ ++ if (req->se->op.release) ++ req->se->op.release(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, 0); ++} ++ ++static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fsync) ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->op.opendir) ++ req->se->op.opendir(req, nodeid, &fi); ++ else ++ fuse_reply_open(req, &fi); ++} ++ ++static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.readdir) ++ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.readdirplus) ++ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ ++ if (req->se->op.releasedir) ++ req->se->op.releasedir(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, 0); ++} ++ ++static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fsyncdir) ++ req->se->op.fsyncdir(req, nodeid, datasync, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ (void) nodeid; ++ (void) inarg; ++ ++ if (req->se->op.statfs) ++ req->se->op.statfs(req, nodeid); ++ else { ++ struct statvfs buf = { ++ .f_namemax = 255, ++ .f_bsize = 512, ++ }; ++ fuse_reply_statfs(req, &buf); ++ } ++} ++ ++static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; ++ char *name = PARAM(arg); ++ char *value = name + strlen(name) + 1; ++ ++ if (req->se->op.setxattr) ++ req->se->op.setxattr(req, nodeid, name, value, arg->size, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ ++ if (req->se->op.getxattr) ++ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ ++ if (req->se->op.listxattr) ++ req->se->op.listxattr(req, nodeid, arg->size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.removexattr) ++ req->se->op.removexattr(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void convert_fuse_file_lock(struct fuse_file_lock *fl, ++ struct flock *flock) ++{ ++ memset(flock, 0, sizeof(struct flock)); ++ flock->l_type = fl->type; ++ flock->l_whence = SEEK_SET; ++ flock->l_start = fl->start; ++ if (fl->end == OFFSET_MAX) ++ flock->l_len = 0; ++ else ++ flock->l_len = fl->end - fl->start + 1; ++ flock->l_pid = fl->pid; ++} ++ ++static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.getlk) ++ req->se->op.getlk(req, nodeid, &fi, &flock); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg, int sleep) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ if (arg->lk_flags & FUSE_LK_FLOCK) { ++ int op = 0; ++ ++ switch (arg->lk.type) { ++ case F_RDLCK: ++ op = LOCK_SH; ++ break; ++ case F_WRLCK: ++ op = LOCK_EX; ++ break; ++ case F_UNLCK: ++ op = LOCK_UN; ++ break; ++ } ++ if (!sleep) ++ op |= LOCK_NB; ++ ++ if (req->se->op.flock) ++ req->se->op.flock(req, nodeid, &fi, op); ++ else ++ fuse_reply_err(req, ENOSYS); ++ } else { ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.setlk) ++ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); ++ else ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ ++static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ do_setlk_common(req, nodeid, inarg, 0); ++} ++ ++static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ do_setlk_common(req, nodeid, inarg, 1); ++} ++ ++static int find_interrupted(struct fuse_session *se, struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->list.next; curr != &se->list; curr = curr->next) { ++ if (curr->unique == req->u.i.unique) { ++ fuse_interrupt_func_t func; ++ void *data; ++ ++ curr->ctr++; ++ pthread_mutex_unlock(&se->lock); ++ ++ /* Ugh, ugly locking */ ++ pthread_mutex_lock(&curr->lock); ++ pthread_mutex_lock(&se->lock); ++ curr->interrupted = 1; ++ func = curr->u.ni.func; ++ data = curr->u.ni.data; ++ pthread_mutex_unlock(&se->lock); ++ if (func) ++ func(curr, data); ++ pthread_mutex_unlock(&curr->lock); ++ ++ pthread_mutex_lock(&se->lock); ++ curr->ctr--; ++ if (!curr->ctr) ++ destroy_req(curr); ++ ++ return 1; ++ } ++ } ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->u.i.unique) ++ return 1; ++ } ++ return 0; ++} ++ ++static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; ++ struct fuse_session *se = req->se; ++ ++ (void) nodeid; ++ if (se->debug) ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long) arg->unique); ++ ++ req->u.i.unique = arg->unique; ++ ++ pthread_mutex_lock(&se->lock); ++ if (find_interrupted(se, req)) ++ destroy_req(req); ++ else ++ list_add_req(req, &se->interrupts); ++ pthread_mutex_unlock(&se->lock); ++} ++ ++static struct fuse_req *check_interrupt(struct fuse_session *se, ++ struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->unique) { ++ req->interrupted = 1; ++ list_del_req(curr); ++ free(curr); ++ return NULL; ++ } ++ } ++ curr = se->interrupts.next; ++ if (curr != &se->interrupts) { ++ list_del_req(curr); ++ list_init_req(curr); ++ return curr; ++ } else ++ return NULL; ++} ++ ++static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; ++ ++ if (req->se->op.bmap) ++ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; ++ unsigned int flags = arg->flags; ++ void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_file_info fi; ++ ++ if (flags & FUSE_IOCTL_DIR && ++ !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { ++ fuse_reply_err(req, ENOTTY); ++ return; ++ } ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && ++ !(flags & FUSE_IOCTL_32BIT)) { ++ req->ioctl_64bit = 1; ++ } ++ ++ if (req->se->op.ioctl) ++ req->se->op.ioctl(req, nodeid, arg->cmd, ++ (void *)(uintptr_t)arg->arg, &fi, flags, ++ in_buf, arg->in_size, arg->out_size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) ++{ ++ free(ph); ++} ++ ++static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.poll_events = arg->events; ++ ++ if (req->se->op.poll) { ++ struct fuse_pollhandle *ph = NULL; ++ ++ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { ++ ph = malloc(sizeof(struct fuse_pollhandle)); ++ if (ph == NULL) { ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ph->kh = arg->kh; ++ ph->se = req->se; ++ } ++ ++ req->se->op.poll(req, nodeid, &fi, ph); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ ++static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fallocate) ++ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) ++{ ++ struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; ++ struct fuse_file_info fi_in, fi_out; ++ ++ memset(&fi_in, 0, sizeof(fi_in)); ++ fi_in.fh = arg->fh_in; ++ ++ memset(&fi_out, 0, sizeof(fi_out)); ++ fi_out.fh = arg->fh_out; ++ ++ ++ if (req->se->op.copy_file_range) ++ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, ++ &fi_in, arg->nodeid_out, ++ arg->off_out, &fi_out, arg->len, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.lseek) ++ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_init_in *arg = (struct fuse_init_in *) inarg; ++ struct fuse_init_out outarg; ++ struct fuse_session *se = req->se; ++ size_t bufsize = se->bufsize; ++ size_t outargsize = sizeof(outarg); ++ ++ (void) nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", ++ arg->max_readahead); ++ } ++ } ++ se->conn.proto_major = arg->major; ++ se->conn.proto_minor = arg->minor; ++ se->conn.capable = 0; ++ se->conn.want = 0; ++ ++ memset(&outarg, 0, sizeof(outarg)); ++ outarg.major = FUSE_KERNEL_VERSION; ++ outarg.minor = FUSE_KERNEL_MINOR_VERSION; ++ ++ if (arg->major < 7) { ++ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", ++ arg->major, arg->minor); ++ fuse_reply_err(req, EPROTO); ++ return; ++ } ++ ++ if (arg->major > 7) { ++ /* Wait for a second INIT request with a 7.X version */ ++ send_reply_ok(req, &outarg, sizeof(outarg)); ++ return; ++ } ++ ++ if (arg->minor >= 6) { ++ if (arg->max_readahead < se->conn.max_readahead) ++ se->conn.max_readahead = arg->max_readahead; ++ if (arg->flags & FUSE_ASYNC_READ) ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ if (arg->flags & FUSE_POSIX_LOCKS) ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ if (arg->flags & FUSE_EXPORT_SUPPORT) ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ if (arg->flags & FUSE_DONT_MASK) ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ if (arg->flags & FUSE_FLOCK_LOCKS) ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ if (arg->flags & FUSE_DO_READDIRPLUS) ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ if (arg->flags & FUSE_ASYNC_DIO) ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ if (arg->flags & FUSE_WRITEBACK_CACHE) ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ if (arg->flags & FUSE_PARALLEL_DIROPS) ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ if (arg->flags & FUSE_POSIX_ACL) ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = ++ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() ++ + FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; ++ } ++ } ++ } else { ++ se->conn.max_readahead = 0; ++ } ++ ++ if (se->conn.proto_minor >= 14) { ++#ifdef HAVE_SPLICE ++#ifdef HAVE_VMSPLICE ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++#endif ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; ++#endif ++ } ++ if (se->conn.proto_minor >= 18) ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; ++ ++ /* Default settings for modern filesystems. ++ * ++ * Most of these capabilities were disabled by default in ++ * libfuse2 for backwards compatibility reasons. In libfuse3, ++ * we can finally enable them by default (as long as they're ++ * supported by the kernel). ++ */ ++#define LL_SET_DEFAULT(cond, cap) \ ++ if ((cond) && (se->conn.capable & (cap))) \ ++ se->conn.want |= (cap) ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); ++ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); ++ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); ++ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); ++ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); ++ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); ++ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, ++ FUSE_CAP_POSIX_LOCKS); ++ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); ++ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, ++ FUSE_CAP_READDIRPLUS_AUTO); ++ se->conn.time_gran = 1; ++ ++ if (bufsize < FUSE_MIN_READ_BUFFER) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", ++ bufsize); ++ bufsize = FUSE_MIN_READ_BUFFER; ++ } ++ se->bufsize = bufsize; ++ ++ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) ++ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; ++ ++ se->got_init = 1; ++ if (se->op.init) ++ se->op.init(se->userdata, &se->conn); ++ ++ if (se->conn.want & (~se->conn.capable)) { ++ fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " ++ "0x%x that are not supported by kernel, aborting.\n", ++ se->conn.want & (~se->conn.capable)); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ unsigned max_read_mo = get_max_read(se->mo); ++ if (se->conn.max_read != max_read_mo) { ++ fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " ++ "requested different maximum read size (%u vs %u)\n", ++ se->conn.max_read, max_read_mo); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; ++ } ++ if (arg->flags & FUSE_MAX_PAGES) { ++ outarg.flags |= FUSE_MAX_PAGES; ++ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; ++ } ++ ++ /* Always enable big writes, this is superseded ++ by the max_write option */ ++ outarg.flags |= FUSE_BIG_WRITES; ++ ++ if (se->conn.want & FUSE_CAP_ASYNC_READ) ++ outarg.flags |= FUSE_ASYNC_READ; ++ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) ++ outarg.flags |= FUSE_POSIX_LOCKS; ++ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) ++ outarg.flags |= FUSE_ATOMIC_O_TRUNC; ++ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) ++ outarg.flags |= FUSE_EXPORT_SUPPORT; ++ if (se->conn.want & FUSE_CAP_DONT_MASK) ++ outarg.flags |= FUSE_DONT_MASK; ++ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) ++ outarg.flags |= FUSE_FLOCK_LOCKS; ++ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) ++ outarg.flags |= FUSE_AUTO_INVAL_DATA; ++ if (se->conn.want & FUSE_CAP_READDIRPLUS) ++ outarg.flags |= FUSE_DO_READDIRPLUS; ++ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) ++ outarg.flags |= FUSE_READDIRPLUS_AUTO; ++ if (se->conn.want & FUSE_CAP_ASYNC_DIO) ++ outarg.flags |= FUSE_ASYNC_DIO; ++ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) ++ outarg.flags |= FUSE_WRITEBACK_CACHE; ++ if (se->conn.want & FUSE_CAP_POSIX_ACL) ++ outarg.flags |= FUSE_POSIX_ACL; ++ outarg.max_readahead = se->conn.max_readahead; ++ outarg.max_write = se->conn.max_write; ++ if (se->conn.proto_minor >= 13) { ++ if (se->conn.max_background >= (1 << 16)) ++ se->conn.max_background = (1 << 16) - 1; ++ if (se->conn.congestion_threshold > se->conn.max_background) ++ se->conn.congestion_threshold = se->conn.max_background; ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = ++ se->conn.max_background * 3 / 4; ++ } ++ ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ } ++ if (se->conn.proto_minor >= 23) ++ outarg.time_gran = se->conn.time_gran; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", ++ outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", ++ outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", ++ outarg.time_gran); ++ } ++ if (arg->minor < 5) ++ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; ++ else if (arg->minor < 23) ++ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; ++ ++ send_reply_ok(req, &outarg, outargsize); ++} ++ ++static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_session *se = req->se; ++ ++ (void) nodeid; ++ (void) inarg; ++ ++ se->got_destroy = 1; ++ if (se->op.destroy) ++ se->op.destroy(se->userdata); ++ ++ send_reply_ok(req, NULL, 0); ++} ++ ++static void list_del_nreq(struct fuse_notify_req *nreq) ++{ ++ struct fuse_notify_req *prev = nreq->prev; ++ struct fuse_notify_req *next = nreq->next; ++ prev->next = next; ++ next->prev = prev; ++} ++ ++static void list_add_nreq(struct fuse_notify_req *nreq, ++ struct fuse_notify_req *next) ++{ ++ struct fuse_notify_req *prev = next->prev; ++ nreq->next = next; ++ nreq->prev = prev; ++ prev->next = nreq; ++ next->prev = nreq; ++} ++ ++static void list_init_nreq(struct fuse_notify_req *nreq) ++{ ++ nreq->next = nreq; ++ nreq->prev = nreq; ++} ++ ++static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg, const struct fuse_buf *buf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_notify_req *nreq; ++ struct fuse_notify_req *head; ++ ++ pthread_mutex_lock(&se->lock); ++ head = &se->notify_list; ++ for (nreq = head->next; nreq != head; nreq = nreq->next) { ++ if (nreq->unique == req->unique) { ++ list_del_nreq(nreq); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&se->lock); ++ ++ if (nreq != head) ++ nreq->reply(nreq, req, nodeid, inarg, buf); ++} ++ ++static int send_notify_iov(struct fuse_session *se, int notify_code, ++ struct iovec *iov, int count) ++{ ++ struct fuse_out_header out; ++ ++ if (!se->got_init) ++ return -ENOTCONN; ++ ++ out.unique = 0; ++ out.error = notify_code; ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ return fuse_send_msg(se, NULL, iov, count); ++} ++ ++int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) ++{ ++ if (ph != NULL) { ++ struct fuse_notify_poll_wakeup_out outarg; ++ struct iovec iov[2]; ++ ++ outarg.kh = ph->kh; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); ++ } else { ++ return 0; ++ } ++} ++ ++int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, ++ off_t off, off_t len) ++{ ++ struct fuse_notify_inval_inode_out outarg; ++ struct iovec iov[2]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) ++ return -ENOSYS; ++ ++ outarg.ino = ino; ++ outarg.off = off; ++ outarg.len = len; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); ++} ++ ++int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, ++ const char *name, size_t namelen) ++{ ++ struct fuse_notify_inval_entry_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) ++ return -ENOSYS; ++ ++ outarg.parent = parent; ++ outarg.namelen = namelen; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; ++ ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); ++} ++ ++int fuse_lowlevel_notify_delete(struct fuse_session *se, ++ fuse_ino_t parent, fuse_ino_t child, ++ const char *name, size_t namelen) ++{ ++ struct fuse_notify_delete_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) ++ return -ENOSYS; ++ ++ outarg.parent = parent; ++ outarg.child = child; ++ outarg.namelen = namelen; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; ++ ++ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); ++} ++ ++int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) ++{ ++ struct fuse_out_header out; ++ struct fuse_notify_store_out outarg; ++ struct iovec iov[3]; ++ size_t size = fuse_buf_size(bufv); ++ int res; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) ++ return -ENOSYS; ++ ++ out.unique = 0; ++ out.error = FUSE_NOTIFY_STORE; ++ ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(out); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ if (res > 0) ++ res = -res; ++ ++ return res; ++} ++ ++struct fuse_retrieve_req { ++ struct fuse_notify_req nreq; ++ void *cookie; ++}; ++ ++static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, ++ fuse_req_t req, fuse_ino_t ino, ++ const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_retrieve_req *rreq = ++ container_of(nreq, struct fuse_retrieve_req, nreq); ++ const struct fuse_notify_retrieve_in *arg = inarg; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) ++ bufv.buf[0].mem = PARAM(arg); ++ ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_notify_retrieve_in); ++ ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); ++ fuse_reply_none(req); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ if (se->op.retrieve_reply) { ++ se->op.retrieve_reply(req, rreq->cookie, ino, ++ arg->offset, &bufv); ++ } else { ++ fuse_reply_none(req); ++ } ++out: ++ free(rreq); ++ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) ++ fuse_ll_clear_pipe(se); ++} ++ ++int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, ++ size_t size, off_t offset, void *cookie) ++{ ++ struct fuse_notify_retrieve_out outarg; ++ struct iovec iov[2]; ++ struct fuse_retrieve_req *rreq; ++ int err; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) ++ return -ENOSYS; ++ ++ rreq = malloc(sizeof(*rreq)); ++ if (rreq == NULL) ++ return -ENOMEM; ++ ++ pthread_mutex_lock(&se->lock); ++ rreq->cookie = cookie; ++ rreq->nreq.unique = se->notify_ctr++; ++ rreq->nreq.reply = fuse_ll_retrieve_reply; ++ list_add_nreq(&rreq->nreq, &se->notify_list); ++ pthread_mutex_unlock(&se->lock); ++ ++ outarg.notify_unique = rreq->nreq.unique; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); ++ if (err) { ++ pthread_mutex_lock(&se->lock); ++ list_del_nreq(&rreq->nreq); ++ pthread_mutex_unlock(&se->lock); ++ free(rreq); ++ } ++ ++ return err; ++} ++ ++void *fuse_req_userdata(fuse_req_t req) ++{ ++ return req->se->userdata; ++} ++ ++const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) ++{ ++ return &req->ctx; ++} ++ ++void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, ++ void *data) ++{ ++ pthread_mutex_lock(&req->lock); ++ pthread_mutex_lock(&req->se->lock); ++ req->u.ni.func = func; ++ req->u.ni.data = data; ++ pthread_mutex_unlock(&req->se->lock); ++ if (req->interrupted && func) ++ func(req, data); ++ pthread_mutex_unlock(&req->lock); ++} ++ ++int fuse_req_interrupted(fuse_req_t req) ++{ ++ int interrupted; ++ ++ pthread_mutex_lock(&req->se->lock); ++ interrupted = req->interrupted; ++ pthread_mutex_unlock(&req->se->lock); ++ ++ return interrupted; ++} ++ ++static struct { ++ void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ const char *name; ++} fuse_ll_ops[] = { ++ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, ++ [FUSE_FORGET] = { do_forget, "FORGET" }, ++ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, ++ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, ++ [FUSE_READLINK] = { do_readlink, "READLINK" }, ++ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, ++ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, ++ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, ++ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, ++ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, ++ [FUSE_RENAME] = { do_rename, "RENAME" }, ++ [FUSE_LINK] = { do_link, "LINK" }, ++ [FUSE_OPEN] = { do_open, "OPEN" }, ++ [FUSE_READ] = { do_read, "READ" }, ++ [FUSE_WRITE] = { do_write, "WRITE" }, ++ [FUSE_STATFS] = { do_statfs, "STATFS" }, ++ [FUSE_RELEASE] = { do_release, "RELEASE" }, ++ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, ++ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, ++ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, ++ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, ++ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, ++ [FUSE_FLUSH] = { do_flush, "FLUSH" }, ++ [FUSE_INIT] = { do_init, "INIT" }, ++ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, ++ [FUSE_READDIR] = { do_readdir, "READDIR" }, ++ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, ++ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, ++ [FUSE_GETLK] = { do_getlk, "GETLK" }, ++ [FUSE_SETLK] = { do_setlk, "SETLK" }, ++ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, ++ [FUSE_ACCESS] = { do_access, "ACCESS" }, ++ [FUSE_CREATE] = { do_create, "CREATE" }, ++ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, ++ [FUSE_BMAP] = { do_bmap, "BMAP" }, ++ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, ++ [FUSE_POLL] = { do_poll, "POLL" }, ++ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, ++ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, ++ [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, ++ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, ++ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, ++ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, ++ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, ++ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, ++ [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, ++}; ++ ++#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) ++ ++static const char *opname(enum fuse_opcode opcode) ++{ ++ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) ++ return "???"; ++ else ++ return fuse_ll_ops[opcode].name; ++} ++ ++static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, ++ struct fuse_bufvec *src) ++{ ++ ssize_t res = fuse_buf_copy(dst, src, 0); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); ++ return res; ++ } ++ if ((size_t)res < fuse_buf_size(dst)) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++void fuse_session_process_buf(struct fuse_session *se, ++ const struct fuse_buf *buf) ++{ ++ fuse_session_process_buf_int(se, buf, NULL); ++} ++ ++void fuse_session_process_buf_int(struct fuse_session *se, ++ const struct fuse_buf *buf, struct fuse_chan *ch) ++{ ++ const size_t write_header_size = sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in); ++ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; ++ struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); ++ struct fuse_in_header *in; ++ const void *inarg; ++ struct fuse_req *req; ++ void *mbuf = NULL; ++ int err; ++ int res; ++ ++ if (buf->flags & FUSE_BUF_IS_FD) { ++ if (buf->size < tmpbuf.buf[0].size) ++ tmpbuf.buf[0].size = buf->size; ++ ++ mbuf = malloc(tmpbuf.buf[0].size); ++ if (mbuf == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); ++ goto clear_pipe; ++ } ++ tmpbuf.buf[0].mem = mbuf; ++ ++ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); ++ if (res < 0) ++ goto clear_pipe; ++ ++ in = mbuf; ++ } else { ++ in = buf->mem; ++ } ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", ++ (unsigned long long) in->unique, ++ opname((enum fuse_opcode) in->opcode), in->opcode, ++ (unsigned long long) in->nodeid, buf->size, in->pid); ++ } ++ ++ req = fuse_ll_alloc_req(se); ++ if (req == NULL) { ++ struct fuse_out_header out = { ++ .unique = in->unique, ++ .error = -ENOMEM, ++ }; ++ struct iovec iov = { ++ .iov_base = &out, ++ .iov_len = sizeof(struct fuse_out_header), ++ }; ++ ++ fuse_send_msg(se, ch, &iov, 1); ++ goto clear_pipe; ++ } ++ ++ req->unique = in->unique; ++ req->ctx.uid = in->uid; ++ req->ctx.gid = in->gid; ++ req->ctx.pid = in->pid; ++ req->ch = ch ? fuse_chan_get(ch) : NULL; ++ ++ err = EIO; ++ if (!se->got_init) { ++ enum fuse_opcode expected; ++ ++ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; ++ if (in->opcode != expected) ++ goto reply_err; ++ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) ++ goto reply_err; ++ ++ err = EACCES; ++ /* Implement -o allow_root */ ++ if (se->deny_others && in->uid != se->owner && in->uid != 0 && ++ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && ++ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && ++ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && ++ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && ++ in->opcode != FUSE_NOTIFY_REPLY && ++ in->opcode != FUSE_READDIRPLUS) ++ goto reply_err; ++ ++ err = ENOSYS; ++ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) ++ goto reply_err; ++ if (in->opcode != FUSE_INTERRUPT) { ++ struct fuse_req *intr; ++ pthread_mutex_lock(&se->lock); ++ intr = check_interrupt(se, req); ++ list_add_req(req, &se->list); ++ pthread_mutex_unlock(&se->lock); ++ if (intr) ++ fuse_reply_err(intr, EAGAIN); ++ } ++ ++ if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && ++ (in->opcode != FUSE_WRITE || !se->op.write_buf) && ++ in->opcode != FUSE_NOTIFY_REPLY) { ++ void *newmbuf; ++ ++ err = ENOMEM; ++ newmbuf = realloc(mbuf, buf->size); ++ if (newmbuf == NULL) ++ goto reply_err; ++ mbuf = newmbuf; ++ ++ tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); ++ tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; ++ ++ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); ++ err = -res; ++ if (res < 0) ++ goto reply_err; ++ ++ in = mbuf; ++ } ++ ++ inarg = (void *) &in[1]; ++ if (in->opcode == FUSE_WRITE && se->op.write_buf) ++ do_write_buf(req, in->nodeid, inarg, buf); ++ else if (in->opcode == FUSE_NOTIFY_REPLY) ++ do_notify_reply(req, in->nodeid, inarg, buf); ++ else ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ ++out_free: ++ free(mbuf); ++ return; ++ ++reply_err: ++ fuse_reply_err(req, err); ++clear_pipe: ++ if (buf->flags & FUSE_BUF_IS_FD) ++ fuse_ll_clear_pipe(se); ++ goto out_free; ++} ++ ++#define LL_OPTION(n,o,v) \ ++ { n, offsetof(struct fuse_session, o), v } ++ ++static const struct fuse_opt fuse_ll_opts[] = { ++ LL_OPTION("debug", debug, 1), ++ LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), ++ LL_OPTION("allow_root", deny_others, 1), ++ FUSE_OPT_END ++}; ++ ++void fuse_lowlevel_version(void) ++{ ++ printf("using FUSE kernel interface version %i.%i\n", ++ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); ++ fuse_mount_version(); ++} ++ ++void fuse_lowlevel_help(void) ++{ ++ /* These are not all options, but the ones that are ++ potentially of interest to an end-user */ ++ printf( ++" -o allow_other allow access by all users\n" ++" -o allow_root allow access by root\n" ++" -o auto_unmount auto unmount on process termination\n"); ++} ++ ++void fuse_session_destroy(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp; ++ ++ if (se->got_init && !se->got_destroy) { ++ if (se->op.destroy) ++ se->op.destroy(se->userdata); ++ } ++ llp = pthread_getspecific(se->pipe_key); ++ if (llp != NULL) ++ fuse_ll_pipe_free(llp); ++ pthread_key_delete(se->pipe_key); ++ pthread_mutex_destroy(&se->lock); ++ free(se->cuse_data); ++ if (se->fd != -1) ++ close(se->fd); ++ destroy_mount_opts(se->mo); ++ free(se); ++} ++ ++ ++static void fuse_ll_pipe_destructor(void *data) ++{ ++ struct fuse_ll_pipe *llp = data; ++ fuse_ll_pipe_free(llp); ++} ++ ++int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) ++{ ++ return fuse_session_receive_buf_int(se, buf, NULL); ++} ++ ++int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, ++ struct fuse_chan *ch) ++{ ++ int err; ++ ssize_t res; ++#ifdef HAVE_SPLICE ++ size_t bufsize = se->bufsize; ++ struct fuse_ll_pipe *llp; ++ struct fuse_buf tmpbuf; ++ ++ if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) ++ goto fallback; ++ ++ llp = fuse_ll_get_pipe(se); ++ if (llp == NULL) ++ goto fallback; ++ ++ if (llp->size < bufsize) { ++ if (llp->can_grow) { ++ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); ++ if (res == -1) { ++ llp->can_grow = 0; ++ res = grow_pipe_to_max(llp->pipe[0]); ++ if (res > 0) ++ llp->size = res; ++ goto fallback; ++ } ++ llp->size = res; ++ } ++ if (llp->size < bufsize) ++ goto fallback; ++ } ++ ++ res = splice(ch ? ch->fd : se->fd, ++ NULL, llp->pipe[1], NULL, bufsize, 0); ++ err = errno; ++ ++ if (fuse_session_exited(se)) ++ return 0; ++ ++ if (res == -1) { ++ if (err == ENODEV) { ++ /* Filesystem was unmounted, or connection was aborted ++ via /sys/fs/fuse/connections */ ++ fuse_session_exit(se); ++ return 0; ++ } ++ if (err != EINTR && err != EAGAIN) ++ perror("fuse: splice from device"); ++ return -err; ++ } ++ ++ if (res < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); ++ return -EIO; ++ } ++ ++ tmpbuf = (struct fuse_buf) { ++ .size = res, ++ .flags = FUSE_BUF_IS_FD, ++ .fd = llp->pipe[0], ++ }; ++ ++ /* ++ * Don't bother with zero copy for small requests. ++ * fuse_loop_mt() needs to check for FORGET so this more than ++ * just an optimization. ++ */ ++ if (res < sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in) + pagesize) { ++ struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; ++ struct fuse_bufvec dst = { .count = 1 }; ++ ++ if (!buf->mem) { ++ buf->mem = malloc(se->bufsize); ++ if (!buf->mem) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: failed to allocate read buffer\n"); ++ return -ENOMEM; ++ } ++ } ++ buf->size = se->bufsize; ++ buf->flags = 0; ++ dst.buf[0] = *buf; ++ ++ res = fuse_buf_copy(&dst, &src, 0); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", ++ strerror(-res)); ++ fuse_ll_clear_pipe(se); ++ return res; ++ } ++ if (res < tmpbuf.size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); ++ fuse_ll_clear_pipe(se); ++ return -EIO; ++ } ++ assert(res == tmpbuf.size); ++ ++ } else { ++ /* Don't overwrite buf->mem, as that would cause a leak */ ++ buf->fd = tmpbuf.fd; ++ buf->flags = tmpbuf.flags; ++ } ++ buf->size = tmpbuf.size; ++ ++ return res; ++ ++fallback: ++#endif ++ if (!buf->mem) { ++ buf->mem = malloc(se->bufsize); ++ if (!buf->mem) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: failed to allocate read buffer\n"); ++ return -ENOMEM; ++ } ++ } ++ ++restart: ++ res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); ++ err = errno; ++ ++ if (fuse_session_exited(se)) ++ return 0; ++ if (res == -1) { ++ /* ENOENT means the operation was interrupted, it's safe ++ to restart */ ++ if (err == ENOENT) ++ goto restart; ++ ++ if (err == ENODEV) { ++ /* Filesystem was unmounted, or connection was aborted ++ via /sys/fs/fuse/connections */ ++ fuse_session_exit(se); ++ return 0; ++ } ++ /* Errors occurring during normal operation: EINTR (read ++ interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem ++ umounted) */ ++ if (err != EINTR && err != EAGAIN) ++ perror("fuse: reading device"); ++ return -err; ++ } ++ if ((size_t) res < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); ++ return -EIO; ++ } ++ ++ buf->size = res; ++ ++ return res; ++} ++ ++struct fuse_session *fuse_session_new(struct fuse_args *args, ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata) ++{ ++ int err; ++ struct fuse_session *se; ++ struct mount_opts *mo; ++ ++ if (sizeof(struct fuse_lowlevel_ops) < op_size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); ++ op_size = sizeof(struct fuse_lowlevel_ops); ++ } ++ ++ if (args->argc == 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); ++ return NULL; ++ } ++ ++ se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); ++ if (se == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); ++ goto out1; ++ } ++ se->fd = -1; ++ se->conn.max_write = UINT_MAX; ++ se->conn.max_readahead = UINT_MAX; ++ ++ /* Parse options */ ++ if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) ++ goto out2; ++ if(se->deny_others) { ++ /* Allowing access only by root is done by instructing ++ * kernel to allow access by everyone, and then restricting ++ * access to root and mountpoint owner in libfuse. ++ */ ++ // We may be adding the option a second time, but ++ // that doesn't hurt. ++ if(fuse_opt_add_arg(args, "-oallow_other") == -1) ++ goto out2; ++ } ++ mo = parse_mount_opts(args); ++ if (mo == NULL) ++ goto out3; ++ ++ if(args->argc == 1 && ++ args->argv[0][0] == '-') { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " ++ "will be ignored\n"); ++ } else if (args->argc != 1) { ++ int i; ++ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); ++ for(i = 1; i < args->argc-1; i++) ++ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); ++ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); ++ goto out4; ++ } ++ ++ if (se->debug) ++ fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); ++ ++ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ ++ list_init_req(&se->list); ++ list_init_req(&se->interrupts); ++ list_init_nreq(&se->notify_list); ++ se->notify_ctr = 1; ++ fuse_mutex_init(&se->lock); ++ ++ err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); ++ if (err) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", ++ strerror(err)); ++ goto out5; ++ } ++ ++ memcpy(&se->op, op, op_size); ++ se->owner = getuid(); ++ se->userdata = userdata; ++ ++ se->mo = mo; ++ return se; ++ ++out5: ++ pthread_mutex_destroy(&se->lock); ++out4: ++ fuse_opt_free_args(args); ++out3: ++ free(mo); ++out2: ++ free(se); ++out1: ++ return NULL; ++} ++ ++int fuse_session_mount(struct fuse_session *se, const char *mountpoint) ++{ ++ int fd; ++ ++ /* ++ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos ++ * would ensue. ++ */ ++ do { ++ fd = open("/dev/null", O_RDWR); ++ if (fd > 2) ++ close(fd); ++ } while (fd >= 0 && fd <= 2); ++ ++ /* ++ * To allow FUSE daemons to run without privileges, the caller may open ++ * /dev/fuse before launching the file system and pass on the file ++ * descriptor by specifying /dev/fd/N as the mount point. Note that the ++ * parent process takes care of performing the mount in this case. ++ */ ++ fd = fuse_mnt_parse_fuse_fd(mountpoint); ++ if (fd != -1) { ++ if (fcntl(fd, F_GETFD) == -1) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: Invalid file descriptor /dev/fd/%u\n", ++ fd); ++ return -1; ++ } ++ se->fd = fd; ++ return 0; ++ } ++ ++ /* Open channel */ ++ fd = fuse_kern_mount(mountpoint, se->mo); ++ if (fd == -1) ++ return -1; ++ se->fd = fd; ++ ++ /* Save mountpoint */ ++ se->mountpoint = strdup(mountpoint); ++ if (se->mountpoint == NULL) ++ goto error_out; ++ ++ return 0; ++ ++error_out: ++ fuse_kern_unmount(mountpoint, fd); ++ return -1; ++} ++ ++int fuse_session_fd(struct fuse_session *se) ++{ ++ return se->fd; ++} ++ ++void fuse_session_unmount(struct fuse_session *se) ++{ ++ if (se->mountpoint != NULL) { ++ fuse_kern_unmount(se->mountpoint, se->fd); ++ free(se->mountpoint); ++ se->mountpoint = NULL; ++ } ++} ++ ++#ifdef linux ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) ++{ ++ char *buf; ++ size_t bufsize = 1024; ++ char path[128]; ++ int ret; ++ int fd; ++ unsigned long pid = req->ctx.pid; ++ char *s; ++ ++ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); ++ ++retry: ++ buf = malloc(bufsize); ++ if (buf == NULL) ++ return -ENOMEM; ++ ++ ret = -EIO; ++ fd = open(path, O_RDONLY); ++ if (fd == -1) ++ goto out_free; ++ ++ ret = read(fd, buf, bufsize); ++ close(fd); ++ if (ret < 0) { ++ ret = -EIO; ++ goto out_free; ++ } ++ ++ if ((size_t)ret == bufsize) { ++ free(buf); ++ bufsize *= 4; ++ goto retry; ++ } ++ ++ ret = -EIO; ++ s = strstr(buf, "\nGroups:"); ++ if (s == NULL) ++ goto out_free; ++ ++ s += 8; ++ ret = 0; ++ while (1) { ++ char *end; ++ unsigned long val = strtoul(s, &end, 0); ++ if (end == s) ++ break; ++ ++ s = end; ++ if (ret < size) ++ list[ret] = val; ++ ret++; ++ } ++ ++out_free: ++ free(buf); ++ return ret; ++} ++#else /* linux */ ++/* ++ * This is currently not implemented on other than Linux... ++ */ ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) ++{ ++ (void) req; (void) size; (void) list; ++ return -ENOSYS; ++} ++#endif ++ ++void fuse_session_exit(struct fuse_session *se) ++{ ++ se->exited = 1; ++} ++ ++void fuse_session_reset(struct fuse_session *se) ++{ ++ se->exited = 0; ++ se->error = 0; ++} ++ ++int fuse_session_exited(struct fuse_session *se) ++{ ++ return se->exited; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch b/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch new file mode 100644 index 0000000..c0ba96a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch @@ -0,0 +1,105 @@ +From 6f413d8b76ff38e5bc01f36515ca71d7fd6e6144 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:58 +0100 +Subject: [PATCH 027/116] virtiofsd: Add main virtio loop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-24-dgilbert@redhat.com> +Patchwork-id: 93475 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 023/112] virtiofsd: Add main virtio loop +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Processes incoming requests on the vhost-user fd. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 204d8ae57b3c57098642c79b3c03d42495149c09) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 42 +++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 39 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 2ae3c76..1928a20 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -11,12 +11,14 @@ + * See the file COPYING.LIB + */ + ++#include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "fuse_virtio.h" + ++#include ++#include + #include + #include + #include +@@ -80,15 +82,49 @@ static const VuDevIface fv_iface = { + .queue_is_processed_in_order = fv_queue_order, + }; + ++/* ++ * Main loop; this mostly deals with events on the vhost-user ++ * socket itself, and not actual fuse data. ++ */ + int virtio_loop(struct fuse_session *se) + { + fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); + +- while (1) { +- /* TODO: Add stuffing */ ++ while (!fuse_session_exited(se)) { ++ struct pollfd pf[1]; ++ pf[0].fd = se->vu_socketfd; ++ pf[0].events = POLLIN; ++ pf[0].revents = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__); ++ int poll_res = ppoll(pf, 1, NULL, NULL); ++ ++ if (poll_res == -1) { ++ if (errno == EINTR) { ++ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", ++ __func__); ++ continue; ++ } ++ fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n"); ++ break; ++ } ++ assert(poll_res == 1); ++ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__, ++ pf[0].revents); ++ break; ++ } ++ assert(pf[0].revents & POLLIN); ++ fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); ++ if (!vu_dispatch(&se->virtio_dev->dev)) { ++ fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); ++ break; ++ } + } + + fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); ++ ++ return 0; + } + + int virtio_session_mount(struct fuse_session *se) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch b/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch new file mode 100644 index 0000000..8ac7fa7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch @@ -0,0 +1,103 @@ +From 9c1bbe327cf8f88ffc78eed0fce8cdd6f3f006ef Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:54 +0100 +Subject: [PATCH 023/116] virtiofsd: Add options for virtio +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-20-dgilbert@redhat.com> +Patchwork-id: 93473 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 019/112] virtiofsd: Add options for virtio +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add options to specify parameters for virtio-fs paths, i.e. + + ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 205de006aab8dcbe546a7e3a51d295c2d05e654b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++--- + tools/virtiofsd/helper.c | 14 +++++++------- + 3 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index bae0699..26b1a7d 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -63,6 +63,7 @@ struct fuse_session { + struct fuse_notify_req notify_list; + size_t bufsize; + int error; ++ char *vu_socket_path; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 8552cfb..17e8718 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2115,8 +2115,11 @@ reply_err: + } + + static const struct fuse_opt fuse_ll_opts[] = { +- LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), +- LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), ++ LL_OPTION("debug", debug, 1), ++ LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), ++ LL_OPTION("allow_root", deny_others, 1), ++ LL_OPTION("--socket-path=%s", vu_socket_path, 0), + FUSE_OPT_END + }; + +@@ -2132,7 +2135,9 @@ void fuse_lowlevel_help(void) + * These are not all options, but the ones that are + * potentially of interest to an end-user + */ +- printf(" -o allow_root allow access by root\n"); ++ printf( ++ " -o allow_root allow access by root\n" ++ " --socket-path=PATH path for the vhost-user socket\n"); + } + + void fuse_session_destroy(struct fuse_session *se) +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 9333691..676032e 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -127,13 +127,13 @@ static const struct fuse_opt conn_info_opt_spec[] = { + + void fuse_cmdline_help(void) + { +- printf( +- " -h --help print help\n" +- " -V --version print version\n" +- " -d -o debug enable debug output (implies -f)\n" +- " -f foreground operation\n" +- " -o max_idle_threads the maximum number of idle worker threads\n" +- " allowed (default: 10)\n"); ++ printf(" -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -o max_idle_threads the maximum number of idle worker " ++ "threads\n" ++ " allowed (default: 10)\n"); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch b/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch new file mode 100644 index 0000000..2510551 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch @@ -0,0 +1,1387 @@ +From 18ef831cac81a6bd2336c73dda357d9d69f8fd25 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:43 +0100 +Subject: [PATCH 012/116] virtiofsd: Add passthrough_ll +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-9-dgilbert@redhat.com> +Patchwork-id: 93462 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 008/112] virtiofsd: Add passthrough_ll +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +passthrough_ll is one of the examples in the upstream fuse project +and is the main part of our daemon here. It passes through requests +from fuse to the underlying filesystem, using syscalls as directly +as possible. + +>From libfuse fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert + Fixed up 'GPL' to 'GPLv2' as per Dan's comments and consistent + with the 'LICENSE' file in libfuse; patch sent to libfuse to fix + it upstream. +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7c6b66027241f41720240fc6ee1021cdbd975b2e) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1338 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 1338 insertions(+) + create mode 100644 tools/virtiofsd/passthrough_ll.c + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +new file mode 100644 +index 0000000..e1a6056 +--- /dev/null ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -0,0 +1,1338 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU GPLv2. ++ See the file COPYING. ++*/ ++ ++/** @file ++ * ++ * This file system mirrors the existing file system hierarchy of the ++ * system, starting at the root file system. This is implemented by ++ * just "passing through" all requests to the corresponding user-space ++ * libc functions. In contrast to passthrough.c and passthrough_fh.c, ++ * this implementation uses the low-level API. Its performance should ++ * be the least bad among the three, but many operations are not ++ * implemented. In particular, it is not possible to remove files (or ++ * directories) because the code necessary to defer actual removal ++ * until the file is not opened anymore would make the example much ++ * more complicated. ++ * ++ * When writeback caching is enabled (-o writeback mount option), it ++ * is only possible to write to files for which the mounting user has ++ * read permissions. This is because the writeback cache requires the ++ * kernel to be able to issue read requests for all files (which the ++ * passthrough filesystem cannot satisfy if it can't read the file in ++ * the underlying filesystem). ++ * ++ * Compile with: ++ * ++ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll ++ * ++ * ## Source code ## ++ * \include passthrough_ll.c ++ */ ++ ++#define _GNU_SOURCE ++#define FUSE_USE_VERSION 31 ++ ++#include "config.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "passthrough_helpers.h" ++ ++/* We are re-using pointers to our `struct lo_inode` and `struct ++ lo_dirp` elements as inodes. This means that we must be able to ++ store uintptr_t values in a fuse_ino_t variable. The following ++ incantation checks this condition at compile time. */ ++#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), ++ "fuse_ino_t too small to hold uintptr_t values!"); ++#else ++struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ ++ { unsigned _uintptr_to_must_hold_fuse_ino_t: ++ ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; ++#endif ++ ++struct lo_inode { ++ struct lo_inode *next; /* protected by lo->mutex */ ++ struct lo_inode *prev; /* protected by lo->mutex */ ++ int fd; ++ bool is_symlink; ++ ino_t ino; ++ dev_t dev; ++ uint64_t refcount; /* protected by lo->mutex */ ++}; ++ ++enum { ++ CACHE_NEVER, ++ CACHE_NORMAL, ++ CACHE_ALWAYS, ++}; ++ ++struct lo_data { ++ pthread_mutex_t mutex; ++ int debug; ++ int writeback; ++ int flock; ++ int xattr; ++ const char *source; ++ double timeout; ++ int cache; ++ int timeout_set; ++ struct lo_inode root; /* protected by lo->mutex */ ++}; ++ ++static const struct fuse_opt lo_opts[] = { ++ { "writeback", ++ offsetof(struct lo_data, writeback), 1 }, ++ { "no_writeback", ++ offsetof(struct lo_data, writeback), 0 }, ++ { "source=%s", ++ offsetof(struct lo_data, source), 0 }, ++ { "flock", ++ offsetof(struct lo_data, flock), 1 }, ++ { "no_flock", ++ offsetof(struct lo_data, flock), 0 }, ++ { "xattr", ++ offsetof(struct lo_data, xattr), 1 }, ++ { "no_xattr", ++ offsetof(struct lo_data, xattr), 0 }, ++ { "timeout=%lf", ++ offsetof(struct lo_data, timeout), 0 }, ++ { "timeout=", ++ offsetof(struct lo_data, timeout_set), 1 }, ++ { "cache=never", ++ offsetof(struct lo_data, cache), CACHE_NEVER }, ++ { "cache=auto", ++ offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=always", ++ offsetof(struct lo_data, cache), CACHE_ALWAYS }, ++ ++ FUSE_OPT_END ++}; ++ ++static struct lo_data *lo_data(fuse_req_t req) ++{ ++ return (struct lo_data *) fuse_req_userdata(req); ++} ++ ++static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) ++{ ++ if (ino == FUSE_ROOT_ID) ++ return &lo_data(req)->root; ++ else ++ return (struct lo_inode *) (uintptr_t) ino; ++} ++ ++static int lo_fd(fuse_req_t req, fuse_ino_t ino) ++{ ++ return lo_inode(req, ino)->fd; ++} ++ ++static bool lo_debug(fuse_req_t req) ++{ ++ return lo_data(req)->debug != 0; ++} ++ ++static void lo_init(void *userdata, ++ struct fuse_conn_info *conn) ++{ ++ struct lo_data *lo = (struct lo_data*) userdata; ++ ++ if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) ++ conn->want |= FUSE_CAP_EXPORT_SUPPORT; ++ ++ if (lo->writeback && ++ conn->capable & FUSE_CAP_WRITEBACK_CACHE) { ++ if (lo->debug) ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); ++ conn->want |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->debug) ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } ++} ++ ++static void lo_getattr(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ struct stat buf; ++ struct lo_data *lo = lo_data(req); ++ ++ (void) fi; ++ ++ res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fuse_reply_attr(req, &buf, lo->timeout); ++} ++ ++static int utimensat_empty_nofollow(struct lo_inode *inode, ++ const struct timespec *tv) ++{ ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = utimensat(inode->fd, "", tv, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1 && errno == EINVAL) { ++ /* Sorry, no race free way to set times on symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return utimensat(AT_FDCWD, procname, tv, 0); ++} ++ ++static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int valid, struct fuse_file_info *fi) ++{ ++ int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ int ifd = inode->fd; ++ int res; ++ ++ if (valid & FUSE_SET_ATTR_MODE) { ++ if (fi) { ++ res = fchmod(fi->fh, attr->st_mode); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = chmod(procname, attr->st_mode); ++ } ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { ++ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? ++ attr->st_uid : (uid_t) -1; ++ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? ++ attr->st_gid : (gid_t) -1; ++ ++ res = fchownat(ifd, "", uid, gid, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & FUSE_SET_ATTR_SIZE) { ++ if (fi) { ++ res = ftruncate(fi->fh, attr->st_size); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = truncate(procname, attr->st_size); ++ } ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { ++ struct timespec tv[2]; ++ ++ tv[0].tv_sec = 0; ++ tv[1].tv_sec = 0; ++ tv[0].tv_nsec = UTIME_OMIT; ++ tv[1].tv_nsec = UTIME_OMIT; ++ ++ if (valid & FUSE_SET_ATTR_ATIME_NOW) ++ tv[0].tv_nsec = UTIME_NOW; ++ else if (valid & FUSE_SET_ATTR_ATIME) ++ tv[0] = attr->st_atim; ++ ++ if (valid & FUSE_SET_ATTR_MTIME_NOW) ++ tv[1].tv_nsec = UTIME_NOW; ++ else if (valid & FUSE_SET_ATTR_MTIME) ++ tv[1] = attr->st_mtim; ++ ++ if (fi) ++ res = futimens(fi->fh, tv); ++ else ++ res = utimensat_empty_nofollow(inode, tv); ++ if (res == -1) ++ goto out_err; ++ } ++ ++ return lo_getattr(req, ino, fi); ++ ++out_err: ++ saverr = errno; ++ fuse_reply_err(req, saverr); ++} ++ ++static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) ++{ ++ struct lo_inode *p; ++ struct lo_inode *ret = NULL; ++ ++ pthread_mutex_lock(&lo->mutex); ++ for (p = lo->root.next; p != &lo->root; p = p->next) { ++ if (p->ino == st->st_ino && p->dev == st->st_dev) { ++ assert(p->refcount > 0); ++ ret = p; ++ ret->refcount++; ++ break; ++ } ++ } ++ pthread_mutex_unlock(&lo->mutex); ++ return ret; ++} ++ ++static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, ++ struct fuse_entry_param *e) ++{ ++ int newfd; ++ int res; ++ int saverr; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ ++ memset(e, 0, sizeof(*e)); ++ e->attr_timeout = lo->timeout; ++ e->entry_timeout = lo->timeout; ++ ++ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ if (newfd == -1) ++ goto out_err; ++ ++ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ ++ inode = lo_find(lo_data(req), &e->attr); ++ if (inode) { ++ close(newfd); ++ newfd = -1; ++ } else { ++ struct lo_inode *prev, *next; ++ ++ saverr = ENOMEM; ++ inode = calloc(1, sizeof(struct lo_inode)); ++ if (!inode) ++ goto out_err; ++ ++ inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ inode->refcount = 1; ++ inode->fd = newfd; ++ inode->ino = e->attr.st_ino; ++ inode->dev = e->attr.st_dev; ++ ++ pthread_mutex_lock(&lo->mutex); ++ prev = &lo->root; ++ next = prev->next; ++ next->prev = inode; ++ inode->next = next; ++ inode->prev = prev; ++ prev->next = inode; ++ pthread_mutex_unlock(&lo->mutex); ++ } ++ e->ino = (uintptr_t) inode; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, (unsigned long long) e->ino); ++ ++ return 0; ++ ++out_err: ++ saverr = errno; ++ if (newfd != -1) ++ close(newfd); ++ return saverr; ++} ++ ++static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_entry(req, &e); ++} ++ ++static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, ++ const char *name, mode_t mode, dev_t rdev, ++ const char *link) ++{ ++ int res; ++ int saverr; ++ struct lo_inode *dir = lo_inode(req, parent); ++ struct fuse_entry_param e; ++ ++ saverr = ENOMEM; ++ ++ res = mknod_wrapper(dir->fd, name, link, mode, rdev); ++ ++ saverr = errno; ++ if (res == -1) ++ goto out; ++ ++ saverr = lo_do_lookup(req, parent, name, &e); ++ if (saverr) ++ goto out; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, (unsigned long long) e.ino); ++ ++ fuse_reply_entry(req, &e); ++ return; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_mknod(fuse_req_t req, fuse_ino_t parent, ++ const char *name, mode_t mode, dev_t rdev) ++{ ++ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); ++} ++ ++static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode) ++{ ++ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); ++} ++ ++static void lo_symlink(fuse_req_t req, const char *link, ++ fuse_ino_t parent, const char *name) ++{ ++ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); ++} ++ ++static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, ++ const char *name) ++{ ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); ++ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { ++ /* Sorry, no race free way to hard-link a symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++} ++ ++static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, ++ const char *name) ++{ ++ int res; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct fuse_entry_param e; ++ int saverr; ++ ++ memset(&e, 0, sizeof(struct fuse_entry_param)); ++ e.attr_timeout = lo->timeout; ++ e.entry_timeout = lo->timeout; ++ ++ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ if (res == -1) ++ goto out_err; ++ ++ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ ++ pthread_mutex_lock(&lo->mutex); ++ inode->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ e.ino = (uintptr_t) inode; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, ++ (unsigned long long) e.ino); ++ ++ fuse_reply_entry(req, &e); ++ return; ++ ++out_err: ++ saverr = errno; ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ int res; ++ ++ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags) ++{ ++ int res; ++ ++ if (flags) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ res = renameat(lo_fd(req, parent), name, ++ lo_fd(req, newparent), newname); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ int res; ++ ++ res = unlinkat(lo_fd(req, parent), name, 0); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) ++{ ++ if (!inode) ++ return; ++ ++ pthread_mutex_lock(&lo->mutex); ++ assert(inode->refcount >= n); ++ inode->refcount -= n; ++ if (!inode->refcount) { ++ struct lo_inode *prev, *next; ++ ++ prev = inode->prev; ++ next = inode->next; ++ next->prev = prev; ++ prev->next = next; ++ ++ pthread_mutex_unlock(&lo->mutex); ++ close(inode->fd); ++ free(inode); ++ ++ } else { ++ pthread_mutex_unlock(&lo->mutex); ++ } ++} ++ ++static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long) ino, ++ (unsigned long long) inode->refcount, ++ (unsigned long long) nlookup); ++ } ++ ++ unref_inode(lo, inode, nlookup); ++} ++ ++static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) ++{ ++ lo_forget_one(req, ino, nlookup); ++ fuse_reply_none(req); ++} ++ ++static void lo_forget_multi(fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets) ++{ ++ int i; ++ ++ for (i = 0; i < count; i++) ++ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); ++ fuse_reply_none(req); ++} ++ ++static void lo_readlink(fuse_req_t req, fuse_ino_t ino) ++{ ++ char buf[PATH_MAX + 1]; ++ int res; ++ ++ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); ++ if (res == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ if (res == sizeof(buf)) ++ return (void) fuse_reply_err(req, ENAMETOOLONG); ++ ++ buf[res] = '\0'; ++ ++ fuse_reply_readlink(req, buf); ++} ++ ++struct lo_dirp { ++ DIR *dp; ++ struct dirent *entry; ++ off_t offset; ++}; ++ ++static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) ++{ ++ return (struct lo_dirp *) (uintptr_t) fi->fh; ++} ++ ++static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int error = ENOMEM; ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ int fd; ++ ++ d = calloc(1, sizeof(struct lo_dirp)); ++ if (d == NULL) ++ goto out_err; ++ ++ fd = openat(lo_fd(req, ino), ".", O_RDONLY); ++ if (fd == -1) ++ goto out_errno; ++ ++ d->dp = fdopendir(fd); ++ if (d->dp == NULL) ++ goto out_errno; ++ ++ d->offset = 0; ++ d->entry = NULL; ++ ++ fi->fh = (uintptr_t) d; ++ if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ fuse_reply_open(req, fi); ++ return; ++ ++out_errno: ++ error = errno; ++out_err: ++ if (d) { ++ if (fd != -1) ++ close(fd); ++ free(d); ++ } ++ fuse_reply_err(req, error); ++} ++ ++static int is_dot_or_dotdot(const char *name) ++{ ++ return name[0] == '.' && (name[1] == '\0' || ++ (name[1] == '.' && name[2] == '\0')); ++} ++ ++static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi, int plus) ++{ ++ struct lo_dirp *d = lo_dirp(fi); ++ char *buf; ++ char *p; ++ size_t rem = size; ++ int err; ++ ++ (void) ino; ++ ++ buf = calloc(1, size); ++ if (!buf) { ++ err = ENOMEM; ++ goto error; ++ } ++ p = buf; ++ ++ if (offset != d->offset) { ++ seekdir(d->dp, offset); ++ d->entry = NULL; ++ d->offset = offset; ++ } ++ while (1) { ++ size_t entsize; ++ off_t nextoff; ++ const char *name; ++ ++ if (!d->entry) { ++ errno = 0; ++ d->entry = readdir(d->dp); ++ if (!d->entry) { ++ if (errno) { // Error ++ err = errno; ++ goto error; ++ } else { // End of stream ++ break; ++ } ++ } ++ } ++ nextoff = d->entry->d_off; ++ name = d->entry->d_name; ++ fuse_ino_t entry_ino = 0; ++ if (plus) { ++ struct fuse_entry_param e; ++ if (is_dot_or_dotdot(name)) { ++ e = (struct fuse_entry_param) { ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ } else { ++ err = lo_do_lookup(req, ino, name, &e); ++ if (err) ++ goto error; ++ entry_ino = e.ino; ++ } ++ ++ entsize = fuse_add_direntry_plus(req, p, rem, name, ++ &e, nextoff); ++ } else { ++ struct stat st = { ++ .st_ino = d->entry->d_ino, ++ .st_mode = d->entry->d_type << 12, ++ }; ++ entsize = fuse_add_direntry(req, p, rem, name, ++ &st, nextoff); ++ } ++ if (entsize > rem) { ++ if (entry_ino != 0) ++ lo_forget_one(req, entry_ino, 1); ++ break; ++ } ++ ++ p += entsize; ++ rem -= entsize; ++ ++ d->entry = NULL; ++ d->offset = nextoff; ++ } ++ ++ err = 0; ++error: ++ // If there's an error, we can only signal it if we haven't stored ++ // any entries yet - otherwise we'd end up with wrong lookup ++ // counts for the entries that are already in the buffer. So we ++ // return what we've collected until that point. ++ if (err && rem == size) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_buf(req, buf, size - rem); ++ free(buf); ++} ++ ++static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ lo_do_readdir(req, ino, size, offset, fi, 0); ++} ++ ++static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ lo_do_readdir(req, ino, size, offset, fi, 1); ++} ++ ++static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ struct lo_dirp *d = lo_dirp(fi); ++ (void) ino; ++ closedir(d->dp); ++ free(d); ++ fuse_reply_err(req, 0); ++} ++ ++static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi) ++{ ++ int fd; ++ struct lo_data *lo = lo_data(req); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ ++ fd = openat(lo_fd(req, parent), name, ++ (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); ++ if (fd == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) ++ fi->direct_io = 1; ++ else if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_create(req, &e, fi); ++} ++ ++static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ int fd = dirfd(lo_dirp(fi)->dp); ++ (void) ino; ++ if (datasync) ++ res = fdatasync(fd); ++ else ++ res = fsync(fd); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int fd; ++ char buf[64]; ++ struct lo_data *lo = lo_data(req); ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ++ ino, fi->flags); ++ ++ /* With writeback cache, kernel may send read requests even ++ when userspace opened write-only */ ++ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* With writeback cache, O_APPEND is handled by the kernel. ++ This breaks atomicity (since the file may change in the ++ underlying filesystem, so that the kernel's idea of the ++ end of the file isn't accurate anymore). In this example, ++ we just accept that. A more rigorous filesystem may want ++ to return an error here */ ++ if (lo->writeback && (fi->flags & O_APPEND)) ++ fi->flags &= ~O_APPEND; ++ ++ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) ++ fi->direct_io = 1; ++ else if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ fuse_reply_open(req, fi); ++} ++ ++static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ (void) ino; ++ ++ close(fi->fh); ++ fuse_reply_err(req, 0); ++} ++ ++static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int res; ++ (void) ino; ++ res = close(dup(fi->fh)); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ (void) ino; ++ if (datasync) ++ res = fdatasync(fi->fh); ++ else ++ res = fsync(fi->fh); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ino, size, (unsigned long) offset); ++ ++ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ buf.buf[0].fd = fi->fh; ++ buf.buf[0].pos = offset; ++ ++ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++} ++ ++static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_bufvec *in_buf, off_t off, ++ struct fuse_file_info *fi) ++{ ++ (void) ino; ++ ssize_t res; ++ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ ++ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].pos = off; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ++ ino, out_buf.buf[0].size, (unsigned long) off); ++ ++ res = fuse_buf_copy(&out_buf, in_buf, 0); ++ if(res < 0) ++ fuse_reply_err(req, -res); ++ else ++ fuse_reply_write(req, (size_t) res); ++} ++ ++static void lo_statfs(fuse_req_t req, fuse_ino_t ino) ++{ ++ int res; ++ struct statvfs stbuf; ++ ++ res = fstatvfs(lo_fd(req, ino), &stbuf); ++ if (res == -1) ++ fuse_reply_err(req, errno); ++ else ++ fuse_reply_statfs(req, &stbuf); ++} ++ ++static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi) ++{ ++ int err = EOPNOTSUPP; ++ (void) ino; ++ ++#ifdef HAVE_FALLOCATE ++ err = fallocate(fi->fh, mode, offset, length); ++ if (err < 0) ++ err = errno; ++ ++#elif defined(HAVE_POSIX_FALLOCATE) ++ if (mode) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } ++ ++ err = posix_fallocate(fi->fh, offset, length); ++#endif ++ ++ fuse_reply_err(req, err); ++} ++ ++static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ int op) ++{ ++ int res; ++ (void) ino; ++ ++ res = flock(fi->fh, op); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size) ++{ ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ++ ino, name, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to getxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) ++ goto out_err; ++ ++ ret = getxattr(procname, name, value, size); ++ if (ret == -1) ++ goto out_err; ++ saverr = 0; ++ if (ret == 0) ++ goto out; ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = getxattr(procname, name, NULL, 0); ++ if (ret == -1) ++ goto out_err; ++ ++ fuse_reply_xattr(req, ret); ++ } ++out_free: ++ free(value); ++ return; ++ ++out_err: ++ saverr = errno; ++out: ++ fuse_reply_err(req, saverr); ++ goto out_free; ++} ++ ++static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) ++{ ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ++ ino, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to listxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) ++ goto out_err; ++ ++ ret = listxattr(procname, value, size); ++ if (ret == -1) ++ goto out_err; ++ saverr = 0; ++ if (ret == 0) ++ goto out; ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = listxattr(procname, NULL, 0); ++ if (ret == -1) ++ goto out_err; ++ ++ fuse_reply_xattr(req, ret); ++ } ++out_free: ++ free(value); ++ return; ++ ++out_err: ++ saverr = errno; ++out: ++ fuse_reply_err(req, saverr); ++ goto out_free; ++} ++ ++static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags) ++{ ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", ++ ino, name, value, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ ret = setxattr(procname, name, value, size, flags); ++ saverr = ret == -1 ? errno : 0; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) ++{ ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ++ ino, name); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ ret = removexattr(procname, name); ++ saverr = ret == -1 ? errno : 0; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++#ifdef HAVE_COPY_FILE_RANGE ++static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, ++ struct fuse_file_info *fi_in, ++ fuse_ino_t ino_out, off_t off_out, ++ struct fuse_file_info *fi_out, size_t len, ++ int flags) ++{ ++ ssize_t res; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, ++ len, flags); ++ ++ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, ++ flags); ++ if (res < 0) ++ fuse_reply_err(req, -errno); ++ else ++ fuse_reply_write(req, res); ++} ++#endif ++ ++static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi) ++{ ++ off_t res; ++ ++ (void)ino; ++ res = lseek(fi->fh, off, whence); ++ if (res != -1) ++ fuse_reply_lseek(req, res); ++ else ++ fuse_reply_err(req, errno); ++} ++ ++static struct fuse_lowlevel_ops lo_oper = { ++ .init = lo_init, ++ .lookup = lo_lookup, ++ .mkdir = lo_mkdir, ++ .mknod = lo_mknod, ++ .symlink = lo_symlink, ++ .link = lo_link, ++ .unlink = lo_unlink, ++ .rmdir = lo_rmdir, ++ .rename = lo_rename, ++ .forget = lo_forget, ++ .forget_multi = lo_forget_multi, ++ .getattr = lo_getattr, ++ .setattr = lo_setattr, ++ .readlink = lo_readlink, ++ .opendir = lo_opendir, ++ .readdir = lo_readdir, ++ .readdirplus = lo_readdirplus, ++ .releasedir = lo_releasedir, ++ .fsyncdir = lo_fsyncdir, ++ .create = lo_create, ++ .open = lo_open, ++ .release = lo_release, ++ .flush = lo_flush, ++ .fsync = lo_fsync, ++ .read = lo_read, ++ .write_buf = lo_write_buf, ++ .statfs = lo_statfs, ++ .fallocate = lo_fallocate, ++ .flock = lo_flock, ++ .getxattr = lo_getxattr, ++ .listxattr = lo_listxattr, ++ .setxattr = lo_setxattr, ++ .removexattr = lo_removexattr, ++#ifdef HAVE_COPY_FILE_RANGE ++ .copy_file_range = lo_copy_file_range, ++#endif ++ .lseek = lo_lseek, ++}; ++ ++int main(int argc, char *argv[]) ++{ ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse_session *se; ++ struct fuse_cmdline_opts opts; ++ struct lo_data lo = { .debug = 0, ++ .writeback = 0 }; ++ int ret = -1; ++ ++ /* Don't mask creation mode, kernel already did that */ ++ umask(0); ++ ++ pthread_mutex_init(&lo.mutex, NULL); ++ lo.root.next = lo.root.prev = &lo.root; ++ lo.root.fd = -1; ++ lo.cache = CACHE_NORMAL; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) ++ return 1; ++ if (opts.show_help) { ++ printf("usage: %s [options] \n\n", argv[0]); ++ fuse_cmdline_help(); ++ fuse_lowlevel_help(); ++ ret = 0; ++ goto err_out1; ++ } else if (opts.show_version) { ++ printf("FUSE library version %s\n", fuse_pkgversion()); ++ fuse_lowlevel_version(); ++ ret = 0; ++ goto err_out1; ++ } ++ ++ if(opts.mountpoint == NULL) { ++ printf("usage: %s [options] \n", argv[0]); ++ printf(" %s --help\n", argv[0]); ++ ret = 1; ++ goto err_out1; ++ } ++ ++ if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) ++ return 1; ++ ++ lo.debug = opts.debug; ++ lo.root.refcount = 2; ++ if (lo.source) { ++ struct stat stat; ++ int res; ++ ++ res = lstat(lo.source, &stat); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", ++ lo.source); ++ exit(1); ++ } ++ if (!S_ISDIR(stat.st_mode)) { ++ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); ++ exit(1); ++ } ++ ++ } else { ++ lo.source = "/"; ++ } ++ lo.root.is_symlink = false; ++ if (!lo.timeout_set) { ++ switch (lo.cache) { ++ case CACHE_NEVER: ++ lo.timeout = 0.0; ++ break; ++ ++ case CACHE_NORMAL: ++ lo.timeout = 1.0; ++ break; ++ ++ case CACHE_ALWAYS: ++ lo.timeout = 86400.0; ++ break; ++ } ++ } else if (lo.timeout < 0) { ++ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", ++ lo.timeout); ++ exit(1); ++ } ++ ++ lo.root.fd = open(lo.source, O_PATH); ++ if (lo.root.fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", ++ lo.source); ++ exit(1); ++ } ++ ++ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); ++ if (se == NULL) ++ goto err_out1; ++ ++ if (fuse_set_signal_handlers(se) != 0) ++ goto err_out2; ++ ++ if (fuse_session_mount(se, opts.mountpoint) != 0) ++ goto err_out3; ++ ++ fuse_daemonize(opts.foreground); ++ ++ /* Block until ctrl+c or fusermount -u */ ++ if (opts.singlethread) ++ ret = fuse_session_loop(se); ++ else ++ ret = fuse_session_loop_mt(se, opts.clone_fd); ++ ++ fuse_session_unmount(se); ++err_out3: ++ fuse_remove_signal_handlers(se); ++err_out2: ++ fuse_session_destroy(se); ++err_out1: ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); ++ ++ if (lo.root.fd >= 0) ++ close(lo.root.fd); ++ ++ return ret ? 1 : 0; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch b/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch new file mode 100644 index 0000000..cef537a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch @@ -0,0 +1,73 @@ +From 52e93f2dc499ead339bf808dac3480b369dfadd1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:39 +0100 +Subject: [PATCH 068/116] virtiofsd: Add timestamp to the log with + FUSE_LOG_DEBUG level +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-65-dgilbert@redhat.com> +Patchwork-id: 93517 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 064/112] virtiofsd: Add timestamp to the log with FUSE_LOG_DEBUG level +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd has some threads, so we see a lot of logs with debug option. +It would be useful for debugging if we can see the timestamp. + +Add nano second timestamp, which got by get_clock(), to the log with +FUSE_LOG_DEBUG level if the syslog option isn't set. + +The log is like as: + + # ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto + ... + [5365943125463727] [ID: 00000002] fv_queue_thread: Start for queue 0 kick_fd 9 + [5365943125568644] [ID: 00000002] fv_queue_thread: Waiting for Queue 0 event + [5365943125573561] [ID: 00000002] fv_queue_thread: Got queue event on Queue 0 + +Signed-off-by: Masayoshi Mizuma +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 50fb955aa0e6ede929422146936cf68bf1ca876f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index f08324f..98114a3 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -36,6 +36,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/timer.h" + #include "fuse_virtio.h" + #include "fuse_log.h" + #include "fuse_lowlevel.h" +@@ -2276,7 +2277,13 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + } + + if (current_log_level == FUSE_LOG_DEBUG) { +- localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); ++ if (!use_syslog) { ++ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", ++ get_clock(), syscall(__NR_gettid), fmt); ++ } else { ++ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), ++ fmt); ++ } + fmt = localfmt; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch b/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch new file mode 100644 index 0000000..4713a0d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch @@ -0,0 +1,85 @@ +From 2b921f7162b53204051955228bf99bbed55d2457 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:53 +0100 +Subject: [PATCH 082/116] virtiofsd: Clean up inodes on destroy +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-79-dgilbert@redhat.com> +Patchwork-id: 93532 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 078/112] virtiofsd: Clean up inodes on destroy +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Clear out our inodes and fd's on a 'destroy' - so we get rid +of them if we reboot the guest. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 771b01eb76ff480fee984bd1d21727147cc3e702) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b176a31..9ed77a1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1169,6 +1169,25 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + } + ++static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) ++{ ++ struct lo_inode *inode = value; ++ struct lo_data *lo = user_data; ++ ++ inode->refcount = 0; ++ lo_map_remove(&lo->ino_map, inode->fuse_ino); ++ close(inode->fd); ++ ++ return TRUE; ++} ++ ++static void unref_all_inodes(struct lo_data *lo) ++{ ++ pthread_mutex_lock(&lo->mutex); ++ g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); ++ pthread_mutex_unlock(&lo->mutex); ++} ++ + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2035,6 +2054,12 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + } + } + ++static void lo_destroy(void *userdata) ++{ ++ struct lo_data *lo = (struct lo_data *)userdata; ++ unref_all_inodes(lo); ++} ++ + static struct fuse_lowlevel_ops lo_oper = { + .init = lo_init, + .lookup = lo_lookup, +@@ -2073,6 +2098,7 @@ static struct fuse_lowlevel_ops lo_oper = { + .copy_file_range = lo_copy_file_range, + #endif + .lseek = lo_lseek, ++ .destroy = lo_destroy, + }; + + /* Print vhost-user.json backend program capabilities */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch b/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch new file mode 100644 index 0000000..c421365 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch @@ -0,0 +1,112 @@ +From 24f91062f571ad2dd2ac22db3b7d456a2c8bd2cb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:23 +0100 +Subject: [PATCH 112/116] virtiofsd: Convert lo_destroy to take the lo->mutex + lock itself +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-109-dgilbert@redhat.com> +Patchwork-id: 93563 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 108/112] virtiofsd: Convert lo_destroy to take the lo->mutex lock itself +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +lo_destroy was relying on some implicit knowledge of the locking; +we can avoid this if we create an unref_inode that doesn't take +the lock and then grab it for the whole of the lo_destroy. + +Suggested-by: Vivek Goyal +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fe4c15798a48143dd6b1f58d2d3cad12206ce211) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++-------------- + 1 file changed, 17 insertions(+), 14 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index eb001b9..fc15d61 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1344,14 +1344,13 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + lo_inode_put(lo, &inode); + } + +-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +- uint64_t n) ++/* To be called with lo->mutex held */ ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + { + if (!inode) { + return; + } + +- pthread_mutex_lock(&lo->mutex); + assert(inode->nlookup >= n); + inode->nlookup -= n; + if (!inode->nlookup) { +@@ -1362,15 +1361,24 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); +- pthread_mutex_unlock(&lo->mutex); + + /* Drop our refcount from lo_do_lookup() */ + lo_inode_put(lo, &inode); +- } else { +- pthread_mutex_unlock(&lo->mutex); + } + } + ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n) ++{ ++ if (!inode) { ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ unref_inode(lo, inode, n); ++ pthread_mutex_unlock(&lo->mutex); ++} ++ + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2458,13 +2466,7 @@ static void lo_destroy(void *userdata) + { + struct lo_data *lo = (struct lo_data *)userdata; + +- /* +- * Normally lo->mutex must be taken when traversing lo->inodes but +- * lo_destroy() is a serialized request so no races are possible here. +- * +- * In addition, we cannot acquire lo->mutex since unref_inode() takes it +- * too and this would result in a recursive lock. +- */ ++ pthread_mutex_lock(&lo->mutex); + while (true) { + GHashTableIter iter; + gpointer key, value; +@@ -2475,8 +2477,9 @@ static void lo_destroy(void *userdata) + } + + struct lo_inode *inode = value; +- unref_inode_lolocked(lo, inode, inode->nlookup); ++ unref_inode(lo, inode, inode->nlookup); + } ++ pthread_mutex_unlock(&lo->mutex); + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch b/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch new file mode 100644 index 0000000..9f198c2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch @@ -0,0 +1,176 @@ +From e217ab392e0d4c770ec18dbfbe986771773cb557 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:33 +0100 +Subject: [PATCH 062/116] virtiofsd: Drop CAP_FSETID if client asked for it +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-59-dgilbert@redhat.com> +Patchwork-id: 93513 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 058/112] virtiofsd: Drop CAP_FSETID if client asked for it +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If client requested killing setuid/setgid bits on file being written, drop +CAP_FSETID capability so that setuid/setgid bits are cleared upon write +automatically. + +pjdfstest chown/12.t needs this. + +Signed-off-by: Vivek Goyal + dgilbert: reworked for libcap-ng +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ee88465224b3aed2596049caa28f86cbe0d5a3d0) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 105 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 97e7c75..d53cb1e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -201,6 +201,91 @@ static int load_capng(void) + return 0; + } + ++/* ++ * Helpers for dropping and regaining effective capabilities. Returns 0 ++ * on success, error otherwise ++ */ ++static int drop_effective_cap(const char *cap_name, bool *cap_dropped) ++{ ++ int cap, ret; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", ++ cap_name, strerror(errno)); ++ goto out; ++ } ++ ++ if (load_capng()) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); ++ goto out; ++ } ++ ++ /* We dont have this capability in effective set already. */ ++ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { ++ ret = 0; ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); ++ goto out; ++ } ++ ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); ++ goto out; ++ } ++ ++ ret = 0; ++ if (cap_dropped) { ++ *cap_dropped = true; ++ } ++ ++out: ++ return ret; ++} ++ ++static int gain_effective_cap(const char *cap_name) ++{ ++ int cap; ++ int ret = 0; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", ++ cap_name, strerror(errno)); ++ goto out; ++ } ++ ++ if (load_capng()) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); ++ goto out; ++ } ++ ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); ++ goto out; ++ } ++ ret = 0; ++ ++out: ++ return ret; ++} ++ + static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; +@@ -1577,6 +1662,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + (void)ino; + ssize_t res; + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ bool cap_fsetid_dropped = false; + + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; + out_buf.buf[0].fd = lo_fi_fd(req, fi); +@@ -1588,12 +1674,31 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].size, (unsigned long)off); + } + ++ /* ++ * If kill_priv is set, drop CAP_FSETID which should lead to kernel ++ * clearing setuid/setgid on file. ++ */ ++ if (fi->kill_priv) { ++ res = drop_effective_cap("FSETID", &cap_fsetid_dropped); ++ if (res != 0) { ++ fuse_reply_err(req, res); ++ return; ++ } ++ } ++ + res = fuse_buf_copy(&out_buf, in_buf); + if (res < 0) { + fuse_reply_err(req, -res); + } else { + fuse_reply_write(req, (size_t)res); + } ++ ++ if (cap_fsetid_dropped) { ++ res = gain_effective_cap("FSETID"); ++ if (res) { ++ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); ++ } ++ } + } + + static void lo_statfs(fuse_req_t req, fuse_ino_t ino) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch b/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch new file mode 100644 index 0000000..03874ce --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch @@ -0,0 +1,240 @@ +From 7d2efc3e4af15eff57b0c38cff7c81b371a98303 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:06 +0100 +Subject: [PATCH 035/116] virtiofsd: Fast path for virtio read +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-32-dgilbert@redhat.com> +Patchwork-id: 93480 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 031/112] virtiofsd: Fast path for virtio read +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Readv the data straight into the guests buffer. + +Signed-off-by: Dr. David Alan Gilbert +With fix by: +Signed-off-by: Eryu Guan +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 5 ++ + tools/virtiofsd/fuse_virtio.c | 162 ++++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.h | 4 + + 3 files changed, 171 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 380d93b..4f4684d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + return fuse_send_msg(se, ch, iov, iov_count); + } + ++ if (fuse_lowlevel_is_virtio(se) && buf->count == 1 && ++ buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) { ++ return virtio_send_data_iov(se, ch, iov, iov_count, buf, len); ++ } ++ + abort(); /* Will have taken vhost path */ + return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index f1adeb6..7e2711b 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -230,6 +230,168 @@ err: + return ret; + } + ++/* ++ * Callback from fuse_send_data_iov_* when it's virtio and the buffer ++ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK ++ * We need send the iov and then the buffer. ++ * Return 0 on success ++ */ ++int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count, struct fuse_bufvec *buf, ++ size_t len) ++{ ++ int ret = 0; ++ VuVirtqElement *elem; ++ VuVirtq *q; ++ ++ assert(count >= 1); ++ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); ++ ++ struct fuse_out_header *out = iov[0].iov_base; ++ /* TODO: Endianness! */ ++ ++ size_t iov_len = iov_size(iov, count); ++ size_t tosend_len = iov_len + len; ++ ++ out->len = tosend_len; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__, ++ count, len, iov_len); ++ ++ /* unique == 0 is notification which we don't support */ ++ assert(out->unique); ++ ++ /* For virtio we always have ch */ ++ assert(ch); ++ assert(!ch->qi->reply_sent); ++ elem = ch->qi->qe; ++ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ ++ /* The 'in' part of the elem is to qemu */ ++ unsigned int in_num = elem->in_num; ++ struct iovec *in_sg = elem->in_sg; ++ size_t in_len = iov_size(in_sg, in_num); ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", ++ __func__, elem->index, in_num, in_len); ++ ++ /* ++ * The elem should have room for a 'fuse_out_header' (out from fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (in_len < sizeof(struct fuse_out_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", ++ __func__, elem->index); ++ ret = E2BIG; ++ goto err; ++ } ++ if (in_len < tosend_len) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", ++ __func__, elem->index, tosend_len); ++ ret = E2BIG; ++ goto err; ++ } ++ ++ /* TODO: Limit to 'len' */ ++ ++ /* First copy the header data from iov->in_sg */ ++ copy_iov(iov, count, in_sg, in_num, iov_len); ++ ++ /* ++ * Build a copy of the the in_sg iov so we can skip bits in it, ++ * including changing the offsets ++ */ ++ struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num); ++ assert(in_sg_cpy); ++ memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); ++ /* These get updated as we skip */ ++ struct iovec *in_sg_ptr = in_sg_cpy; ++ int in_sg_cpy_count = in_num; ++ ++ /* skip over parts of in_sg that contained the header iov */ ++ size_t skip_size = iov_len; ++ ++ size_t in_sg_left = 0; ++ do { ++ while (skip_size != 0 && in_sg_cpy_count) { ++ if (skip_size >= in_sg_ptr[0].iov_len) { ++ skip_size -= in_sg_ptr[0].iov_len; ++ in_sg_ptr++; ++ in_sg_cpy_count--; ++ } else { ++ in_sg_ptr[0].iov_len -= skip_size; ++ in_sg_ptr[0].iov_base += skip_size; ++ break; ++ } ++ } ++ ++ int i; ++ for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { ++ in_sg_left += in_sg_ptr[i].iov_len; ++ } ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: after skip skip_size=%zd in_sg_cpy_count=%d " ++ "in_sg_left=%zd\n", ++ __func__, skip_size, in_sg_cpy_count, in_sg_left); ++ ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, ++ buf->buf[0].pos); ++ ++ if (ret == -1) { ++ ret = errno; ++ fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", ++ __func__, len); ++ free(in_sg_cpy); ++ goto err; ++ } ++ fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, ++ ret, len); ++ if (ret < len && ret) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); ++ /* Skip over this much next time around */ ++ skip_size = ret; ++ buf->buf[0].pos += ret; ++ len -= ret; ++ ++ /* Lets do another read */ ++ continue; ++ } ++ if (!ret) { ++ /* EOF case? */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, ++ in_sg_left); ++ break; ++ } ++ if (ret != len) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); ++ ret = EIO; ++ free(in_sg_cpy); ++ goto err; ++ } ++ in_sg_left -= ret; ++ len -= ret; ++ } while (in_sg_left); ++ free(in_sg_cpy); ++ ++ /* Need to fix out->len on EOF */ ++ if (len) { ++ struct fuse_out_header *out_sg = in_sg[0].iov_base; ++ ++ tosend_len -= len; ++ out_sg->len = tosend_len; ++ } ++ ++ ret = 0; ++ ++ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); ++ vu_queue_notify(&se->virtio_dev->dev, q); ++ ++err: ++ if (ret == 0) { ++ ch->qi->reply_sent = true; ++ } ++ ++ return ret; ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 135a148..cc676b9 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se); + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count); + ++int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count, ++ struct fuse_bufvec *buf, size_t len); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch b/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch new file mode 100644 index 0000000..12bb9a2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch @@ -0,0 +1,164 @@ +From 6d41fc549198e140f38fddcb02975098df040ae1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:50 +0100 +Subject: [PATCH 019/116] virtiofsd: Fix common header and define for QEMU + builds +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-16-dgilbert@redhat.com> +Patchwork-id: 93470 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 015/112] virtiofsd: Fix common header and define for QEMU builds +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +All of the fuse files include config.h and define GNU_SOURCE +where we don't have either under our build - remove them. +Fixup path to the kernel's fuse.h in the QEMUs world. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 09863ebc7e32a107235b3c815ad54d26cc64f07a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 4 +--- + tools/virtiofsd/fuse_i.h | 3 +++ + tools/virtiofsd/fuse_log.c | 1 + + tools/virtiofsd/fuse_lowlevel.c | 6 ++---- + tools/virtiofsd/fuse_opt.c | 2 +- + tools/virtiofsd/fuse_signals.c | 2 +- + tools/virtiofsd/helper.c | 1 + + tools/virtiofsd/passthrough_ll.c | 8 ++------ + 8 files changed, 12 insertions(+), 15 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 4d507f3..772efa9 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -9,9 +9,7 @@ + * See the file COPYING.LIB + */ + +-#define _GNU_SOURCE +- +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index e63cb58..bae0699 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -6,6 +6,9 @@ + * See the file COPYING.LIB + */ + ++#define FUSE_USE_VERSION 31 ++ ++ + #include "fuse.h" + #include "fuse_lowlevel.h" + +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +index 11345f9..c301ff6 100644 +--- a/tools/virtiofsd/fuse_log.c ++++ b/tools/virtiofsd/fuse_log.c +@@ -8,6 +8,7 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_log.h" + + #include +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 3da80de..07fb8a6 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -9,11 +9,9 @@ + * See the file COPYING.LIB + */ + +-#define _GNU_SOURCE +- +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" +-#include "fuse_kernel.h" ++#include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" + +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +index edd36f4..2892236 100644 +--- a/tools/virtiofsd/fuse_opt.c ++++ b/tools/virtiofsd/fuse_opt.c +@@ -9,8 +9,8 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_opt.h" +-#include "config.h" + #include "fuse_i.h" + #include "fuse_misc.h" + +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index 19d6791..dc7c8ac 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -8,7 +8,7 @@ + * See the file COPYING.LIB + */ + +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index d9227d7..9333691 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -10,6 +10,7 @@ + * See the file COPYING.LIB. + */ + ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include "fuse_misc.h" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 126a56c..322a889 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -35,15 +35,11 @@ + * \include passthrough_ll.c + */ + +-#define _GNU_SOURCE +-#define FUSE_USE_VERSION 31 +- +-#include "config.h" +- ++#include "qemu/osdep.h" ++#include "fuse_lowlevel.h" + #include + #include + #include +-#include + #include + #include + #include +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch b/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch new file mode 100644 index 0000000..f929bab --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch @@ -0,0 +1,136 @@ +From 9b5fbc95a287b2ce9448142194b161d8360d5e4e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:15 +0100 +Subject: [PATCH 104/116] virtiofsd: Fix data corruption with O_APPEND write in + writeback mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-101-dgilbert@redhat.com> +Patchwork-id: 93556 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 100/112] virtiofsd: Fix data corruption with O_APPEND write in writeback mode +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Misono Tomohiro + +When writeback mode is enabled (-o writeback), O_APPEND handling is +done in kernel. Therefore virtiofsd clears O_APPEND flag when open. +Otherwise O_APPEND flag takes precedence over pwrite() and write +data may corrupt. + +Currently clearing O_APPEND flag is done in lo_open(), but we also +need the same operation in lo_create(). So, factor out the flag +update operation in lo_open() to update_open_flags() and call it +in both lo_open() and lo_create(). + +This fixes the failure of xfstest generic/069 in writeback mode +(which tests O_APPEND write data integrity). + +Signed-off-by: Misono Tomohiro +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8e4e41e39eac5ee5f378d66f069a2f70a1734317) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 66 ++++++++++++++++++++-------------------- + 1 file changed, 33 insertions(+), 33 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 948cb19..4c61ac5 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1692,6 +1692,37 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + fuse_reply_err(req, 0); + } + ++static void update_open_flags(int writeback, struct fuse_file_info *fi) ++{ ++ /* ++ * With writeback cache, kernel may send read requests even ++ * when userspace opened write-only ++ */ ++ if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* ++ * With writeback cache, O_APPEND is handled by the kernel. ++ * This breaks atomicity (since the file may change in the ++ * underlying filesystem, so that the kernel's idea of the ++ * end of the file isn't accurate anymore). In this example, ++ * we just accept that. A more rigorous filesystem may want ++ * to return an error here ++ */ ++ if (writeback && (fi->flags & O_APPEND)) { ++ fi->flags &= ~O_APPEND; ++ } ++ ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++} ++ + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +@@ -1721,12 +1752,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- /* +- * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. +- */ +- fi->flags &= ~O_DIRECT; ++ update_open_flags(lo->writeback, fi); + + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +@@ -1936,33 +1962,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- /* +- * With writeback cache, kernel may send read requests even +- * when userspace opened write-only +- */ +- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { +- fi->flags &= ~O_ACCMODE; +- fi->flags |= O_RDWR; +- } +- +- /* +- * With writeback cache, O_APPEND is handled by the kernel. +- * This breaks atomicity (since the file may change in the +- * underlying filesystem, so that the kernel's idea of the +- * end of the file isn't accurate anymore). In this example, +- * we just accept that. A more rigorous filesystem may want +- * to return an error here +- */ +- if (lo->writeback && (fi->flags & O_APPEND)) { +- fi->flags &= ~O_APPEND; +- } +- +- /* +- * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. +- */ +- fi->flags &= ~O_DIRECT; ++ update_open_flags(lo->writeback, fi); + + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch b/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch new file mode 100644 index 0000000..306c183 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch @@ -0,0 +1,120 @@ +From 9f726593bc3acbc247876dcc4d79fbf046958003 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:49 +0100 +Subject: [PATCH 018/116] virtiofsd: Fix fuse_daemonize ignored return values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-15-dgilbert@redhat.com> +Patchwork-id: 93469 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 014/112] virtiofsd: Fix fuse_daemonize ignored return values +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +QEMU's compiler enables warnings/errors for ignored values +and the (void) trick used in the fuse code isn't enough. +Turn all the return values into a return value on the function. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 30d8e49760712d65697ea517c53671bd1d214fc7) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 33 ++++++++++++++++++++++----------- + 1 file changed, 22 insertions(+), 11 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5e6f205..d9227d7 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -10,12 +10,10 @@ + * See the file COPYING.LIB. + */ + +-#include "config.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "mount_util.h" + + #include + #include +@@ -171,6 +169,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + + int fuse_daemonize(int foreground) + { ++ int ret = 0, rett; + if (!foreground) { + int nullfd; + int waiter[2]; +@@ -192,8 +191,8 @@ int fuse_daemonize(int foreground) + case 0: + break; + default: +- (void)read(waiter[0], &completed, sizeof(completed)); +- _exit(0); ++ _exit(read(waiter[0], &completed, ++ sizeof(completed) != sizeof(completed))); + } + + if (setsid() == -1) { +@@ -201,13 +200,22 @@ int fuse_daemonize(int foreground) + return -1; + } + +- (void)chdir("/"); ++ ret = chdir("/"); + + nullfd = open("/dev/null", O_RDWR, 0); + if (nullfd != -1) { +- (void)dup2(nullfd, 0); +- (void)dup2(nullfd, 1); +- (void)dup2(nullfd, 2); ++ rett = dup2(nullfd, 0); ++ if (!ret) { ++ ret = rett; ++ } ++ rett = dup2(nullfd, 1); ++ if (!ret) { ++ ret = rett; ++ } ++ rett = dup2(nullfd, 2); ++ if (!ret) { ++ ret = rett; ++ } + if (nullfd > 2) { + close(nullfd); + } +@@ -215,13 +223,16 @@ int fuse_daemonize(int foreground) + + /* Propagate completion of daemon initialization */ + completed = 1; +- (void)write(waiter[1], &completed, sizeof(completed)); ++ rett = write(waiter[1], &completed, sizeof(completed)); ++ if (!ret) { ++ ret = rett; ++ } + close(waiter[0]); + close(waiter[1]); + } else { +- (void)chdir("/"); ++ ret = chdir("/"); + } +- return 0; ++ return ret; + } + + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch b/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch new file mode 100644 index 0000000..532948f --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch @@ -0,0 +1,327 @@ +From 8721796f22a8a61d82974088e542377ee6db209e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:14 +0000 +Subject: [PATCH 18/18] virtiofsd: Fix xattr operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-8-dgilbert@redhat.com> +Patchwork-id: 94123 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 7/7] virtiofsd: Fix xattr operations +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Misono Tomohiro + +Current virtiofsd has problems about xattr operations and +they does not work properly for directory/symlink/special file. + +The fundamental cause is that virtiofsd uses openat() + f...xattr() +systemcalls for xattr operation but we should not open symlink/special +file in the daemon. Therefore the function is restricted. + +Fix this problem by: + 1. during setup of each thread, call unshare(CLONE_FS) + 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular + file or directory, use fchdir(proc_loot_fd) + ...xattr() + + fchdir(root.fd) instead of openat() + f...xattr() + + (Note: for a regular file/directory openat() + f...xattr() + is still used for performance reason) + +With this patch, xfstests generic/062 passes on virtiofs. + +This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. +The original discussion can be found here: + https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html + +Signed-off-by: Misono Tomohiro +Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> +Acked-by: Vivek Goyal +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bdfd66788349acc43cd3f1298718ad491663cfcc) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_virtio.c | 13 +++++ + tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++------------------ + tools/virtiofsd/seccomp.c | 6 +++ + 3 files changed, 77 insertions(+), 47 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index dd1c605..3b6d16a 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -426,6 +426,8 @@ err: + return ret; + } + ++static __thread bool clone_fs_called; ++ + /* Process one FVRequest in a thread pool */ + static void fv_queue_worker(gpointer data, gpointer user_data) + { +@@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data) + + assert(se->bufsize > sizeof(struct fuse_in_header)); + ++ if (!clone_fs_called) { ++ int ret; ++ ++ /* unshare FS for xattr operation */ ++ ret = unshare(CLONE_FS); ++ /* should not fail */ ++ assert(ret == 0); ++ ++ clone_fs_called = true; ++ } ++ + /* + * An element contains one request and the space to send our response + * They're spread over multiple descriptors in a scatter/gather set +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 50c7273..9cba3f1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -123,7 +123,7 @@ struct lo_inode { + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + +- bool is_symlink; ++ mode_t filetype; + }; + + struct lo_cred { +@@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, + struct lo_inode *parent; + char path[PATH_MAX]; + +- if (inode->is_symlink) { ++ if (S_ISLNK(inode->filetype)) { + res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); + if (res == -1 && errno == EINVAL) { + /* Sorry, no race free way to set times on symlink. */ +@@ -929,7 +929,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out_err; + } + +- inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ /* cache only filetype */ ++ inode->filetype = (e->attr.st_mode & S_IFMT); + + /* + * One for the caller and one for nlookup (released in +@@ -1139,7 +1140,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, + struct lo_inode *parent; + char path[PATH_MAX]; + +- if (inode->is_symlink) { ++ if (S_ISLNK(inode->filetype)) { + res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { + /* Sorry, no race free way to hard-link a symlink. */ +@@ -2193,12 +2194,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", + ino, name, size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to getxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + if (size) { + value = malloc(size); + if (!value) { +@@ -2207,12 +2202,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + } + + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDONLY); +- if (fd < 0) { +- goto out_err; ++ /* ++ * It is not safe to open() non-regular/non-dir files in file server ++ * unless O_PATH is used, so use that method for regular files/dir ++ * only (as it seems giving less performance overhead). ++ * Otherwise, call fchdir() to avoid open(). ++ */ ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } ++ ret = fgetxattr(fd, name, value, size); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = getxattr(procname, name, value, size); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fgetxattr(fd, name, value, size); + if (ret == -1) { + goto out_err; + } +@@ -2266,12 +2274,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, + size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to listxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + if (size) { + value = malloc(size); + if (!value) { +@@ -2280,12 +2282,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + } + + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDONLY); +- if (fd < 0) { +- goto out_err; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } ++ ret = flistxattr(fd, value, size); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = listxattr(procname, value, size); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = flistxattr(fd, value, size); + if (ret == -1) { + goto out_err; + } +@@ -2339,20 +2348,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 + ", name=%s value=%s size=%zd)\n", ino, name, value, size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd < 0) { +- saverr = errno; +- goto out; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } ++ ret = fsetxattr(fd, name, value, size, flags); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = setxattr(procname, name, value, size, flags); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fsetxattr(fd, name, value, size, flags); + saverr = ret == -1 ? errno : 0; + + out: +@@ -2387,20 +2397,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, + name); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd < 0) { +- saverr = errno; +- goto out; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } ++ ret = fremovexattr(fd, name); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = removexattr(procname, name); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fremovexattr(fd, name); + saverr = ret == -1 ? errno : 0; + + out: +@@ -2800,7 +2811,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + exit(1); + } + +- root->is_symlink = false; ++ root->filetype = S_IFDIR; + root->fd = fd; + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 2d9d4a7..bd9e7b0 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -41,6 +41,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(exit), + SCMP_SYS(exit_group), + SCMP_SYS(fallocate), ++ SCMP_SYS(fchdir), + SCMP_SYS(fchmodat), + SCMP_SYS(fchownat), + SCMP_SYS(fcntl), +@@ -62,7 +63,9 @@ static const int syscall_whitelist[] = { + SCMP_SYS(getpid), + SCMP_SYS(gettid), + SCMP_SYS(gettimeofday), ++ SCMP_SYS(getxattr), + SCMP_SYS(linkat), ++ SCMP_SYS(listxattr), + SCMP_SYS(lseek), + SCMP_SYS(madvise), + SCMP_SYS(mkdirat), +@@ -85,6 +88,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(recvmsg), + SCMP_SYS(renameat), + SCMP_SYS(renameat2), ++ SCMP_SYS(removexattr), + SCMP_SYS(rt_sigaction), + SCMP_SYS(rt_sigprocmask), + SCMP_SYS(rt_sigreturn), +@@ -98,10 +102,12 @@ static const int syscall_whitelist[] = { + SCMP_SYS(setresuid32), + #endif + SCMP_SYS(set_robust_list), ++ SCMP_SYS(setxattr), + SCMP_SYS(symlinkat), + SCMP_SYS(time), /* Rarely needed, except on static builds */ + SCMP_SYS(tgkill), + SCMP_SYS(unlinkat), ++ SCMP_SYS(unshare), + SCMP_SYS(utimensat), + SCMP_SYS(write), + SCMP_SYS(writev), +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch b/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch new file mode 100644 index 0000000..5593a33 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch @@ -0,0 +1,14743 @@ +From e313ab94af558bbc133e7a93b0a6dbff706dd1d8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:45 +0100 +Subject: [PATCH 014/116] virtiofsd: Format imported files to qemu style +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-11-dgilbert@redhat.com> +Patchwork-id: 93464 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 010/112] virtiofsd: Format imported files to qemu style +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Mostly using a set like: + +indent -nut -i 4 -nlp -br -cs -ce --no-space-after-function-call-names file +clang-format -style=file -i -- file +clang-tidy -fix-errors -checks=readability-braces-around-statements file +clang-format -style=file -i -- file + +With manual cleanups. + +The .clang-format used is below. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed by: Aleksandar Markovic + +Language: Cpp +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false # although we like it, it creates churn +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: false # churn +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account +AlwaysBreakBeforeMultilineStrings: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterStruct: false + AfterUnion: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: false +BreakStringLiterals: true +ColumnLimit: 80 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ForEachMacros: [ + 'CPU_FOREACH', + 'CPU_FOREACH_REVERSE', + 'CPU_FOREACH_SAFE', + 'IOMMU_NOTIFIER_FOREACH', + 'QLIST_FOREACH', + 'QLIST_FOREACH_ENTRY', + 'QLIST_FOREACH_RCU', + 'QLIST_FOREACH_SAFE', + 'QLIST_FOREACH_SAFE_RCU', + 'QSIMPLEQ_FOREACH', + 'QSIMPLEQ_FOREACH_SAFE', + 'QSLIST_FOREACH', + 'QSLIST_FOREACH_SAFE', + 'QTAILQ_FOREACH', + 'QTAILQ_FOREACH_REVERSE', + 'QTAILQ_FOREACH_SAFE', + 'QTAILQ_RAW_FOREACH', + 'RAMBLOCK_FOREACH' +] +IncludeCategories: + - Regex: '^"qemu/osdep.h' + Priority: -3 + - Regex: '^"(block|chardev|crypto|disas|exec|fpu|hw|io|libdecnumber|migration|monitor|net|qapi|qemu|qom|standard-headers|sysemu|ui)/' + Priority: -2 + - Regex: '^"(elf.h|qemu-common.h|glib-compat.h|qemu-io.h|trace-tcg.h)' + Priority: -1 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '$' +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? +MacroBlockEnd: '.*_END$' +MaxEmptyLinesToKeep: 2 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInContainerLiterals: true +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Auto +UseTab: Never +... + +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7387863d033e8028aa09a815736617a7c4490827) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 434 ++-- + tools/virtiofsd/fuse.h | 1572 +++++++------- + tools/virtiofsd/fuse_common.h | 730 +++---- + tools/virtiofsd/fuse_i.h | 121 +- + tools/virtiofsd/fuse_log.c | 38 +- + tools/virtiofsd/fuse_log.h | 32 +- + tools/virtiofsd/fuse_lowlevel.c | 3638 +++++++++++++++++---------------- + tools/virtiofsd/fuse_lowlevel.h | 2392 +++++++++++----------- + tools/virtiofsd/fuse_misc.h | 30 +- + tools/virtiofsd/fuse_opt.c | 659 +++--- + tools/virtiofsd/fuse_opt.h | 79 +- + tools/virtiofsd/fuse_signals.c | 118 +- + tools/virtiofsd/helper.c | 506 ++--- + tools/virtiofsd/passthrough_helpers.h | 33 +- + tools/virtiofsd/passthrough_ll.c | 2061 ++++++++++--------- + 15 files changed, 6382 insertions(+), 6061 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index aefb7db..5df946c 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -1,252 +1,272 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2010 Miklos Szeredi +- +- Functions for dealing with `struct fuse_buf` and `struct +- fuse_bufvec`. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2010 Miklos Szeredi ++ * ++ * Functions for dealing with `struct fuse_buf` and `struct ++ * fuse_bufvec`. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #define _GNU_SOURCE + + #include "config.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" ++#include ++#include + #include + #include +-#include +-#include + + size_t fuse_buf_size(const struct fuse_bufvec *bufv) + { +- size_t i; +- size_t size = 0; +- +- for (i = 0; i < bufv->count; i++) { +- if (bufv->buf[i].size == SIZE_MAX) +- size = SIZE_MAX; +- else +- size += bufv->buf[i].size; +- } +- +- return size; ++ size_t i; ++ size_t size = 0; ++ ++ for (i = 0; i < bufv->count; i++) { ++ if (bufv->buf[i].size == SIZE_MAX) { ++ size = SIZE_MAX; ++ } else { ++ size += bufv->buf[i].size; ++ } ++ } ++ ++ return size; + } + + static size_t min_size(size_t s1, size_t s2) + { +- return s1 < s2 ? s1 : s2; ++ return s1 < s2 ? s1 : s2; + } + + static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- ssize_t res = 0; +- size_t copied = 0; +- +- while (len) { +- if (dst->flags & FUSE_BUF_FD_SEEK) { +- res = pwrite(dst->fd, (char *)src->mem + src_off, len, +- dst->pos + dst_off); +- } else { +- res = write(dst->fd, (char *)src->mem + src_off, len); +- } +- if (res == -1) { +- if (!copied) +- return -errno; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(dst->flags & FUSE_BUF_FD_RETRY)) +- break; +- +- src_off += res; +- dst_off += res; +- len -= res; +- } +- +- return copied; ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ res = pwrite(dst->fd, (char *)src->mem + src_off, len, ++ dst->pos + dst_off); ++ } else { ++ res = write(dst->fd, (char *)src->mem + src_off, len); ++ } ++ if (res == -1) { ++ if (!copied) { ++ return -errno; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ if (!(dst->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ src_off += res; ++ dst_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- ssize_t res = 0; +- size_t copied = 0; +- +- while (len) { +- if (src->flags & FUSE_BUF_FD_SEEK) { +- res = pread(src->fd, (char *)dst->mem + dst_off, len, +- src->pos + src_off); +- } else { +- res = read(src->fd, (char *)dst->mem + dst_off, len); +- } +- if (res == -1) { +- if (!copied) +- return -errno; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(src->flags & FUSE_BUF_FD_RETRY)) +- break; +- +- dst_off += res; +- src_off += res; +- len -= res; +- } +- +- return copied; ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ res = pread(src->fd, (char *)dst->mem + dst_off, len, ++ src->pos + src_off); ++ } else { ++ res = read(src->fd, (char *)dst->mem + dst_off, len); ++ } ++ if (res == -1) { ++ if (!copied) { ++ return -errno; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- char buf[4096]; +- struct fuse_buf tmp = { +- .size = sizeof(buf), +- .flags = 0, +- }; +- ssize_t res; +- size_t copied = 0; +- +- tmp.mem = buf; +- +- while (len) { +- size_t this_len = min_size(tmp.size, len); +- size_t read_len; +- +- res = fuse_buf_read(&tmp, 0, src, src_off, this_len); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- if (res == 0) +- break; +- +- read_len = res; +- res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- +- if (res < this_len) +- break; +- +- dst_off += res; +- src_off += res; +- len -= res; +- } +- +- return copied; ++ char buf[4096]; ++ struct fuse_buf tmp = { ++ .size = sizeof(buf), ++ .flags = 0, ++ }; ++ ssize_t res; ++ size_t copied = 0; ++ ++ tmp.mem = buf; ++ ++ while (len) { ++ size_t this_len = min_size(tmp.size, len); ++ size_t read_len; ++ ++ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ read_len = res; ++ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ ++ if (res < this_len) { ++ break; ++ } ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) + { +- int src_is_fd = src->flags & FUSE_BUF_IS_FD; +- int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; +- +- if (!src_is_fd && !dst_is_fd) { +- char *dstmem = (char *)dst->mem + dst_off; +- char *srcmem = (char *)src->mem + src_off; +- +- if (dstmem != srcmem) { +- if (dstmem + len <= srcmem || srcmem + len <= dstmem) +- memcpy(dstmem, srcmem, len); +- else +- memmove(dstmem, srcmem, len); +- } +- +- return len; +- } else if (!src_is_fd) { +- return fuse_buf_write(dst, dst_off, src, src_off, len); +- } else if (!dst_is_fd) { +- return fuse_buf_read(dst, dst_off, src, src_off, len); +- } else { +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); +- } ++ int src_is_fd = src->flags & FUSE_BUF_IS_FD; ++ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; ++ ++ if (!src_is_fd && !dst_is_fd) { ++ char *dstmem = (char *)dst->mem + dst_off; ++ char *srcmem = (char *)src->mem + src_off; ++ ++ if (dstmem != srcmem) { ++ if (dstmem + len <= srcmem || srcmem + len <= dstmem) { ++ memcpy(dstmem, srcmem, len); ++ } else { ++ memmove(dstmem, srcmem, len); ++ } ++ } ++ ++ return len; ++ } else if (!src_is_fd) { ++ return fuse_buf_write(dst, dst_off, src, src_off, len); ++ } else if (!dst_is_fd) { ++ return fuse_buf_read(dst, dst_off, src, src_off, len); ++ } else { ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++ } + } + + static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) + { +- if (bufv->idx < bufv->count) +- return &bufv->buf[bufv->idx]; +- else +- return NULL; ++ if (bufv->idx < bufv->count) { ++ return &bufv->buf[bufv->idx]; ++ } else { ++ return NULL; ++ } + } + + static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + { +- const struct fuse_buf *buf = fuse_bufvec_current(bufv); +- +- bufv->off += len; +- assert(bufv->off <= buf->size); +- if (bufv->off == buf->size) { +- assert(bufv->idx < bufv->count); +- bufv->idx++; +- if (bufv->idx == bufv->count) +- return 0; +- bufv->off = 0; +- } +- return 1; ++ const struct fuse_buf *buf = fuse_bufvec_current(bufv); ++ ++ bufv->off += len; ++ assert(bufv->off <= buf->size); ++ if (bufv->off == buf->size) { ++ assert(bufv->idx < bufv->count); ++ bufv->idx++; ++ if (bufv->idx == bufv->count) { ++ return 0; ++ } ++ bufv->off = 0; ++ } ++ return 1; + } + + ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, +- enum fuse_buf_copy_flags flags) ++ enum fuse_buf_copy_flags flags) + { +- size_t copied = 0; +- +- if (dstv == srcv) +- return fuse_buf_size(dstv); +- +- for (;;) { +- const struct fuse_buf *src = fuse_bufvec_current(srcv); +- const struct fuse_buf *dst = fuse_bufvec_current(dstv); +- size_t src_len; +- size_t dst_len; +- size_t len; +- ssize_t res; +- +- if (src == NULL || dst == NULL) +- break; +- +- src_len = src->size - srcv->off; +- dst_len = dst->size - dstv->off; +- len = min_size(src_len, dst_len); +- +- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- copied += res; +- +- if (!fuse_bufvec_advance(srcv, res) || +- !fuse_bufvec_advance(dstv, res)) +- break; +- +- if (res < len) +- break; +- } +- +- return copied; ++ size_t copied = 0; ++ ++ if (dstv == srcv) { ++ return fuse_buf_size(dstv); ++ } ++ ++ for (;;) { ++ const struct fuse_buf *src = fuse_bufvec_current(srcv); ++ const struct fuse_buf *dst = fuse_bufvec_current(dstv); ++ size_t src_len; ++ size_t dst_len; ++ size_t len; ++ ssize_t res; ++ ++ if (src == NULL || dst == NULL) { ++ break; ++ } ++ ++ src_len = src->size - srcv->off; ++ dst_len = dst->size - dstv->off; ++ len = min_size(src_len, dst_len); ++ ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ copied += res; ++ ++ if (!fuse_bufvec_advance(srcv, res) || ++ !fuse_bufvec_advance(dstv, res)) { ++ break; ++ } ++ ++ if (res < len) { ++ break; ++ } ++ } ++ ++ return copied; + } +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 3202fba..7a4c713 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -1,15 +1,15 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_H_ + #define FUSE_H_ + +-/** @file ++/* + * + * This file defines the library interface of FUSE + * +@@ -19,15 +19,15 @@ + #include "fuse_common.h" + + #include +-#include +-#include + #include + #include ++#include + #include ++#include + +-/* ----------------------------------------------------------- * +- * Basic FUSE API * +- * ----------------------------------------------------------- */ ++/* ++ * Basic FUSE API ++ */ + + /** Handle for a FUSE filesystem */ + struct fuse; +@@ -36,38 +36,39 @@ struct fuse; + * Readdir flags, passed to ->readdir() + */ + enum fuse_readdir_flags { +- /** +- * "Plus" mode. +- * +- * The kernel wants to prefill the inode cache during readdir. The +- * filesystem may honour this by filling in the attributes and setting +- * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also +- * just ignore this flag completely. +- */ +- FUSE_READDIR_PLUS = (1 << 0), ++ /** ++ * "Plus" mode. ++ * ++ * The kernel wants to prefill the inode cache during readdir. The ++ * filesystem may honour this by filling in the attributes and setting ++ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also ++ * just ignore this flag completely. ++ */ ++ FUSE_READDIR_PLUS = (1 << 0), + }; + + enum fuse_fill_dir_flags { +- /** +- * "Plus" mode: all file attributes are valid +- * +- * The attributes are used by the kernel to prefill the inode cache +- * during a readdir. +- * +- * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set +- * and vice versa. +- */ +- FUSE_FILL_DIR_PLUS = (1 << 1), ++ /** ++ * "Plus" mode: all file attributes are valid ++ * ++ * The attributes are used by the kernel to prefill the inode cache ++ * during a readdir. ++ * ++ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set ++ * and vice versa. ++ */ ++ FUSE_FILL_DIR_PLUS = (1 << 1), + }; + +-/** Function to add an entry in a readdir() operation ++/** ++ * Function to add an entry in a readdir() operation + * + * The *off* parameter can be any non-zero value that enables the + * filesystem to identify the current point in the directory + * stream. It does not need to be the actual physical position. A + * value of zero is reserved to indicate that seeking in directories + * is not supported. +- * ++ * + * @param buf the buffer passed to the readdir() operation + * @param name the file name of the directory entry + * @param stat file attributes, can be NULL +@@ -75,9 +76,9 @@ enum fuse_fill_dir_flags { + * @param flags fill flags + * @return 1 if buffer is full, zero otherwise + */ +-typedef int (*fuse_fill_dir_t) (void *buf, const char *name, +- const struct stat *stbuf, off_t off, +- enum fuse_fill_dir_flags flags); ++typedef int (*fuse_fill_dir_t)(void *buf, const char *name, ++ const struct stat *stbuf, off_t off, ++ enum fuse_fill_dir_flags flags); + /** + * Configuration of the high-level API + * +@@ -87,186 +88,186 @@ typedef int (*fuse_fill_dir_t) (void *buf, const char *name, + * file system implementation. + */ + struct fuse_config { +- /** +- * If `set_gid` is non-zero, the st_gid attribute of each file +- * is overwritten with the value of `gid`. +- */ +- int set_gid; +- unsigned int gid; +- +- /** +- * If `set_uid` is non-zero, the st_uid attribute of each file +- * is overwritten with the value of `uid`. +- */ +- int set_uid; +- unsigned int uid; +- +- /** +- * If `set_mode` is non-zero, the any permissions bits set in +- * `umask` are unset in the st_mode attribute of each file. +- */ +- int set_mode; +- unsigned int umask; +- +- /** +- * The timeout in seconds for which name lookups will be +- * cached. +- */ +- double entry_timeout; +- +- /** +- * The timeout in seconds for which a negative lookup will be +- * cached. This means, that if file did not exist (lookup +- * retuned ENOENT), the lookup will only be redone after the +- * timeout, and the file/directory will be assumed to not +- * exist until then. A value of zero means that negative +- * lookups are not cached. +- */ +- double negative_timeout; +- +- /** +- * The timeout in seconds for which file/directory attributes +- * (as returned by e.g. the `getattr` handler) are cached. +- */ +- double attr_timeout; +- +- /** +- * Allow requests to be interrupted +- */ +- int intr; +- +- /** +- * Specify which signal number to send to the filesystem when +- * a request is interrupted. The default is hardcoded to +- * USR1. +- */ +- int intr_signal; +- +- /** +- * Normally, FUSE assigns inodes to paths only for as long as +- * the kernel is aware of them. With this option inodes are +- * instead remembered for at least this many seconds. This +- * will require more memory, but may be necessary when using +- * applications that make use of inode numbers. +- * +- * A number of -1 means that inodes will be remembered for the +- * entire life-time of the file-system process. +- */ +- int remember; +- +- /** +- * The default behavior is that if an open file is deleted, +- * the file is renamed to a hidden file (.fuse_hiddenXXX), and +- * only removed when the file is finally released. This +- * relieves the filesystem implementation of having to deal +- * with this problem. This option disables the hiding +- * behavior, and files are removed immediately in an unlink +- * operation (or in a rename operation which overwrites an +- * existing file). +- * +- * It is recommended that you not use the hard_remove +- * option. When hard_remove is set, the following libc +- * functions fail on unlinked files (returning errno of +- * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), +- * ftruncate(2), fstat(2), fchmod(2), fchown(2) +- */ +- int hard_remove; +- +- /** +- * Honor the st_ino field in the functions getattr() and +- * fill_dir(). This value is used to fill in the st_ino field +- * in the stat(2), lstat(2), fstat(2) functions and the d_ino +- * field in the readdir(2) function. The filesystem does not +- * have to guarantee uniqueness, however some applications +- * rely on this value being unique for the whole filesystem. +- * +- * Note that this does *not* affect the inode that libfuse +- * and the kernel use internally (also called the "nodeid"). +- */ +- int use_ino; +- +- /** +- * If use_ino option is not given, still try to fill in the +- * d_ino field in readdir(2). If the name was previously +- * looked up, and is still in the cache, the inode number +- * found there will be used. Otherwise it will be set to -1. +- * If use_ino option is given, this option is ignored. +- */ +- int readdir_ino; +- +- /** +- * This option disables the use of page cache (file content cache) +- * in the kernel for this filesystem. This has several affects: +- * +- * 1. Each read(2) or write(2) system call will initiate one +- * or more read or write operations, data will not be +- * cached in the kernel. +- * +- * 2. The return value of the read() and write() system calls +- * will correspond to the return values of the read and +- * write operations. This is useful for example if the +- * file size is not known in advance (before reading it). +- * +- * Internally, enabling this option causes fuse to set the +- * `direct_io` field of `struct fuse_file_info` - overwriting +- * any value that was put there by the file system. +- */ +- int direct_io; +- +- /** +- * This option disables flushing the cache of the file +- * contents on every open(2). This should only be enabled on +- * filesystems where the file data is never changed +- * externally (not through the mounted FUSE filesystem). Thus +- * it is not suitable for network filesystems and other +- * intermediate filesystems. +- * +- * NOTE: if this option is not specified (and neither +- * direct_io) data is still cached after the open(2), so a +- * read(2) system call will not always initiate a read +- * operation. +- * +- * Internally, enabling this option causes fuse to set the +- * `keep_cache` field of `struct fuse_file_info` - overwriting +- * any value that was put there by the file system. +- */ +- int kernel_cache; +- +- /** +- * This option is an alternative to `kernel_cache`. Instead of +- * unconditionally keeping cached data, the cached data is +- * invalidated on open(2) if if the modification time or the +- * size of the file has changed since it was last opened. +- */ +- int auto_cache; +- +- /** +- * The timeout in seconds for which file attributes are cached +- * for the purpose of checking if auto_cache should flush the +- * file data on open. +- */ +- int ac_attr_timeout_set; +- double ac_attr_timeout; +- +- /** +- * If this option is given the file-system handlers for the +- * following operations will not receive path information: +- * read, write, flush, release, fsync, readdir, releasedir, +- * fsyncdir, lock, ioctl and poll. +- * +- * For the truncate, getattr, chmod, chown and utimens +- * operations the path will be provided only if the struct +- * fuse_file_info argument is NULL. +- */ +- int nullpath_ok; +- +- /** +- * The remaining options are used by libfuse internally and +- * should not be touched. +- */ +- int show_help; +- char *modules; +- int debug; ++ /** ++ * If `set_gid` is non-zero, the st_gid attribute of each file ++ * is overwritten with the value of `gid`. ++ */ ++ int set_gid; ++ unsigned int gid; ++ ++ /** ++ * If `set_uid` is non-zero, the st_uid attribute of each file ++ * is overwritten with the value of `uid`. ++ */ ++ int set_uid; ++ unsigned int uid; ++ ++ /** ++ * If `set_mode` is non-zero, the any permissions bits set in ++ * `umask` are unset in the st_mode attribute of each file. ++ */ ++ int set_mode; ++ unsigned int umask; ++ ++ /** ++ * The timeout in seconds for which name lookups will be ++ * cached. ++ */ ++ double entry_timeout; ++ ++ /** ++ * The timeout in seconds for which a negative lookup will be ++ * cached. This means, that if file did not exist (lookup ++ * retuned ENOENT), the lookup will only be redone after the ++ * timeout, and the file/directory will be assumed to not ++ * exist until then. A value of zero means that negative ++ * lookups are not cached. ++ */ ++ double negative_timeout; ++ ++ /** ++ * The timeout in seconds for which file/directory attributes ++ * (as returned by e.g. the `getattr` handler) are cached. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Allow requests to be interrupted ++ */ ++ int intr; ++ ++ /** ++ * Specify which signal number to send to the filesystem when ++ * a request is interrupted. The default is hardcoded to ++ * USR1. ++ */ ++ int intr_signal; ++ ++ /** ++ * Normally, FUSE assigns inodes to paths only for as long as ++ * the kernel is aware of them. With this option inodes are ++ * instead remembered for at least this many seconds. This ++ * will require more memory, but may be necessary when using ++ * applications that make use of inode numbers. ++ * ++ * A number of -1 means that inodes will be remembered for the ++ * entire life-time of the file-system process. ++ */ ++ int remember; ++ ++ /** ++ * The default behavior is that if an open file is deleted, ++ * the file is renamed to a hidden file (.fuse_hiddenXXX), and ++ * only removed when the file is finally released. This ++ * relieves the filesystem implementation of having to deal ++ * with this problem. This option disables the hiding ++ * behavior, and files are removed immediately in an unlink ++ * operation (or in a rename operation which overwrites an ++ * existing file). ++ * ++ * It is recommended that you not use the hard_remove ++ * option. When hard_remove is set, the following libc ++ * functions fail on unlinked files (returning errno of ++ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), ++ * ftruncate(2), fstat(2), fchmod(2), fchown(2) ++ */ ++ int hard_remove; ++ ++ /** ++ * Honor the st_ino field in the functions getattr() and ++ * fill_dir(). This value is used to fill in the st_ino field ++ * in the stat(2), lstat(2), fstat(2) functions and the d_ino ++ * field in the readdir(2) function. The filesystem does not ++ * have to guarantee uniqueness, however some applications ++ * rely on this value being unique for the whole filesystem. ++ * ++ * Note that this does *not* affect the inode that libfuse ++ * and the kernel use internally (also called the "nodeid"). ++ */ ++ int use_ino; ++ ++ /** ++ * If use_ino option is not given, still try to fill in the ++ * d_ino field in readdir(2). If the name was previously ++ * looked up, and is still in the cache, the inode number ++ * found there will be used. Otherwise it will be set to -1. ++ * If use_ino option is given, this option is ignored. ++ */ ++ int readdir_ino; ++ ++ /** ++ * This option disables the use of page cache (file content cache) ++ * in the kernel for this filesystem. This has several affects: ++ * ++ * 1. Each read(2) or write(2) system call will initiate one ++ * or more read or write operations, data will not be ++ * cached in the kernel. ++ * ++ * 2. The return value of the read() and write() system calls ++ * will correspond to the return values of the read and ++ * write operations. This is useful for example if the ++ * file size is not known in advance (before reading it). ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `direct_io` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int direct_io; ++ ++ /** ++ * This option disables flushing the cache of the file ++ * contents on every open(2). This should only be enabled on ++ * filesystems where the file data is never changed ++ * externally (not through the mounted FUSE filesystem). Thus ++ * it is not suitable for network filesystems and other ++ * intermediate filesystems. ++ * ++ * NOTE: if this option is not specified (and neither ++ * direct_io) data is still cached after the open(2), so a ++ * read(2) system call will not always initiate a read ++ * operation. ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `keep_cache` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int kernel_cache; ++ ++ /** ++ * This option is an alternative to `kernel_cache`. Instead of ++ * unconditionally keeping cached data, the cached data is ++ * invalidated on open(2) if if the modification time or the ++ * size of the file has changed since it was last opened. ++ */ ++ int auto_cache; ++ ++ /** ++ * The timeout in seconds for which file attributes are cached ++ * for the purpose of checking if auto_cache should flush the ++ * file data on open. ++ */ ++ int ac_attr_timeout_set; ++ double ac_attr_timeout; ++ ++ /** ++ * If this option is given the file-system handlers for the ++ * following operations will not receive path information: ++ * read, write, flush, release, fsync, readdir, releasedir, ++ * fsyncdir, lock, ioctl and poll. ++ * ++ * For the truncate, getattr, chmod, chown and utimens ++ * operations the path will be provided only if the struct ++ * fuse_file_info argument is NULL. ++ */ ++ int nullpath_ok; ++ ++ /** ++ * The remaining options are used by libfuse internally and ++ * should not be touched. ++ */ ++ int show_help; ++ char *modules; ++ int debug; + }; + + +@@ -293,515 +294,535 @@ struct fuse_config { + * Almost all operations take a path which can be of any length. + */ + struct fuse_operations { +- /** Get file attributes. +- * +- * Similar to stat(). The 'st_dev' and 'st_blksize' fields are +- * ignored. The 'st_ino' field is ignored except if the 'use_ino' +- * mount option is given. In that case it is passed to userspace, +- * but libfuse and the kernel will still assign a different +- * inode for internal use (called the "nodeid"). +- * +- * `fi` will always be NULL if the file is not currently open, but +- * may also be NULL if the file is open. +- */ +- int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); +- +- /** Read the target of a symbolic link +- * +- * The buffer should be filled with a null terminated string. The +- * buffer size argument includes the space for the terminating +- * null character. If the linkname is too long to fit in the +- * buffer, it should be truncated. The return value should be 0 +- * for success. +- */ +- int (*readlink) (const char *, char *, size_t); +- +- /** Create a file node +- * +- * This is called for creation of all non-directory, non-symlink +- * nodes. If the filesystem defines a create() method, then for +- * regular files that will be called instead. +- */ +- int (*mknod) (const char *, mode_t, dev_t); +- +- /** Create a directory +- * +- * Note that the mode argument may not have the type specification +- * bits set, i.e. S_ISDIR(mode) can be false. To obtain the +- * correct directory type bits use mode|S_IFDIR +- * */ +- int (*mkdir) (const char *, mode_t); +- +- /** Remove a file */ +- int (*unlink) (const char *); +- +- /** Remove a directory */ +- int (*rmdir) (const char *); +- +- /** Create a symbolic link */ +- int (*symlink) (const char *, const char *); +- +- /** Rename a file +- * +- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If +- * RENAME_NOREPLACE is specified, the filesystem must not +- * overwrite *newname* if it exists and return an error +- * instead. If `RENAME_EXCHANGE` is specified, the filesystem +- * must atomically exchange the two files, i.e. both must +- * exist and neither may be deleted. +- */ +- int (*rename) (const char *, const char *, unsigned int flags); +- +- /** Create a hard link to a file */ +- int (*link) (const char *, const char *); +- +- /** Change the permission bits of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- */ +- int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); +- +- /** Change the owner and group of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); +- +- /** Change the size of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*truncate) (const char *, off_t, struct fuse_file_info *fi); +- +- /** Open a file +- * +- * Open flags are available in fi->flags. The following rules +- * apply. +- * +- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be +- * filtered out / handled by the kernel. +- * +- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) +- * should be used by the filesystem to check if the operation is +- * permitted. If the ``-o default_permissions`` mount option is +- * given, this check is already done by the kernel before calling +- * open() and may thus be omitted by the filesystem. +- * +- * - When writeback caching is enabled, the kernel may send +- * read requests even for files opened with O_WRONLY. The +- * filesystem should be prepared to handle this. +- * +- * - When writeback caching is disabled, the filesystem is +- * expected to properly handle the O_APPEND flag and ensure +- * that each write is appending to the end of the file. +- * +- * - When writeback caching is enabled, the kernel will +- * handle O_APPEND. However, unless all changes to the file +- * come through the kernel this will not work reliably. The +- * filesystem should thus either ignore the O_APPEND flag +- * (and let the kernel handle it), or return an error +- * (indicating that reliably O_APPEND is not available). +- * +- * Filesystem may store an arbitrary file handle (pointer, +- * index, etc) in fi->fh, and use this in other all other file +- * operations (read, write, flush, release, fsync). +- * +- * Filesystem may also implement stateless file I/O and not store +- * anything in fi->fh. +- * +- * There are also some flags (direct_io, keep_cache) which the +- * filesystem may set in fi, to change the way the file is opened. +- * See fuse_file_info structure in for more details. +- * +- * If this request is answered with an error code of ENOSYS +- * and FUSE_CAP_NO_OPEN_SUPPORT is set in +- * `fuse_conn_info.capable`, this is treated as success and +- * future calls to open will also succeed without being send +- * to the filesystem process. +- * +- */ +- int (*open) (const char *, struct fuse_file_info *); +- +- /** Read data from an open file +- * +- * Read should return exactly the number of bytes requested except +- * on EOF or error, otherwise the rest of the data will be +- * substituted with zeroes. An exception to this is when the +- * 'direct_io' mount option is specified, in which case the return +- * value of the read system call will reflect the return value of +- * this operation. +- */ +- int (*read) (const char *, char *, size_t, off_t, +- struct fuse_file_info *); +- +- /** Write data to an open file +- * +- * Write should return exactly the number of bytes requested +- * except on error. An exception to this is when the 'direct_io' +- * mount option is specified (see read operation). +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*write) (const char *, const char *, size_t, off_t, +- struct fuse_file_info *); +- +- /** Get file system statistics +- * +- * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored +- */ +- int (*statfs) (const char *, struct statvfs *); +- +- /** Possibly flush cached data +- * +- * BIG NOTE: This is not equivalent to fsync(). It's not a +- * request to sync dirty data. +- * +- * Flush is called on each close() of a file descriptor, as opposed to +- * release which is called on the close of the last file descriptor for +- * a file. Under Linux, errors returned by flush() will be passed to +- * userspace as errors from close(), so flush() is a good place to write +- * back any cached dirty data. However, many applications ignore errors +- * on close(), and on non-Linux systems, close() may succeed even if flush() +- * returns an error. For these reasons, filesystems should not assume +- * that errors returned by flush will ever be noticed or even +- * delivered. +- * +- * NOTE: The flush() method may be called more than once for each +- * open(). This happens if more than one file descriptor refers to an +- * open file handle, e.g. due to dup(), dup2() or fork() calls. It is +- * not possible to determine if a flush is final, so each flush should +- * be treated equally. Multiple write-flush sequences are relatively +- * rare, so this shouldn't be a problem. +- * +- * Filesystems shouldn't assume that flush will be called at any +- * particular point. It may be called more times than expected, or not +- * at all. +- * +- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html +- */ +- int (*flush) (const char *, struct fuse_file_info *); +- +- /** Release an open file +- * +- * Release is called when there are no more references to an open +- * file: all file descriptors are closed and all memory mappings +- * are unmapped. +- * +- * For every open() call there will be exactly one release() call +- * with the same flags and file handle. It is possible to +- * have a file opened more than once, in which case only the last +- * release will mean, that no more reads/writes will happen on the +- * file. The return value of release is ignored. +- */ +- int (*release) (const char *, struct fuse_file_info *); +- +- /** Synchronize file contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data. +- */ +- int (*fsync) (const char *, int, struct fuse_file_info *); +- +- /** Set extended attributes */ +- int (*setxattr) (const char *, const char *, const char *, size_t, int); +- +- /** Get extended attributes */ +- int (*getxattr) (const char *, const char *, char *, size_t); +- +- /** List extended attributes */ +- int (*listxattr) (const char *, char *, size_t); +- +- /** Remove extended attributes */ +- int (*removexattr) (const char *, const char *); +- +- /** Open directory +- * +- * Unless the 'default_permissions' mount option is given, +- * this method should check if opendir is permitted for this +- * directory. Optionally opendir may also return an arbitrary +- * filehandle in the fuse_file_info structure, which will be +- * passed to readdir, releasedir and fsyncdir. +- */ +- int (*opendir) (const char *, struct fuse_file_info *); +- +- /** Read directory +- * +- * The filesystem may choose between two modes of operation: +- * +- * 1) The readdir implementation ignores the offset parameter, and +- * passes zero to the filler function's offset. The filler +- * function will not return '1' (unless an error happens), so the +- * whole directory is read in a single readdir operation. +- * +- * 2) The readdir implementation keeps track of the offsets of the +- * directory entries. It uses the offset parameter and always +- * passes non-zero offset to the filler function. When the buffer +- * is full (or an error happens) the filler function will return +- * '1'. +- */ +- int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, +- struct fuse_file_info *, enum fuse_readdir_flags); +- +- /** Release directory +- */ +- int (*releasedir) (const char *, struct fuse_file_info *); +- +- /** Synchronize directory contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data +- */ +- int (*fsyncdir) (const char *, int, struct fuse_file_info *); +- +- /** +- * Initialize filesystem +- * +- * The return value will passed in the `private_data` field of +- * `struct fuse_context` to all file operations, and as a +- * parameter to the destroy() method. It overrides the initial +- * value provided to fuse_main() / fuse_new(). +- */ +- void *(*init) (struct fuse_conn_info *conn, +- struct fuse_config *cfg); +- +- /** +- * Clean up filesystem +- * +- * Called on filesystem exit. +- */ +- void (*destroy) (void *private_data); +- +- /** +- * Check file access permissions +- * +- * This will be called for the access() system call. If the +- * 'default_permissions' mount option is given, this method is not +- * called. +- * +- * This method is not called under Linux kernel versions 2.4.x +- */ +- int (*access) (const char *, int); +- +- /** +- * Create and open a file +- * +- * If the file does not exist, first create it with the specified +- * mode, and then open it. +- * +- * If this method is not implemented or under Linux kernel +- * versions earlier than 2.6.15, the mknod() and open() methods +- * will be called instead. +- */ +- int (*create) (const char *, mode_t, struct fuse_file_info *); +- +- /** +- * Perform POSIX file locking operation +- * +- * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. +- * +- * For the meaning of fields in 'struct flock' see the man page +- * for fcntl(2). The l_whence field will always be set to +- * SEEK_SET. +- * +- * For checking lock ownership, the 'fuse_file_info->owner' +- * argument must be used. +- * +- * For F_GETLK operation, the library will first check currently +- * held locks, and if a conflicting lock is found it will return +- * information without calling this method. This ensures, that +- * for local locks the l_pid field is correctly filled in. The +- * results may not be accurate in case of race conditions and in +- * the presence of hard links, but it's unlikely that an +- * application would rely on accurate GETLK results in these +- * cases. If a conflicting lock is not found, this method will be +- * called, and the filesystem may fill out l_pid by a meaningful +- * value, or it may leave this field zero. +- * +- * For F_SETLK and F_SETLKW the l_pid field will be set to the pid +- * of the process performing the locking operation. +- * +- * Note: if this method is not implemented, the kernel will still +- * allow file locking to work locally. Hence it is only +- * interesting for network filesystems and similar. +- */ +- int (*lock) (const char *, struct fuse_file_info *, int cmd, +- struct flock *); +- +- /** +- * Change the access and modification times of a file with +- * nanosecond resolution +- * +- * This supersedes the old utime() interface. New applications +- * should use this. +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * See the utimensat(2) man page for details. +- */ +- int (*utimens) (const char *, const struct timespec tv[2], +- struct fuse_file_info *fi); +- +- /** +- * Map block index within file to block index within device +- * +- * Note: This makes sense only for block device backed filesystems +- * mounted with the 'blkdev' option +- */ +- int (*bmap) (const char *, size_t blocksize, uint64_t *idx); +- +- /** +- * Ioctl +- * +- * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in +- * 64bit environment. The size and direction of data is +- * determined by _IOC_*() decoding of cmd. For _IOC_NONE, +- * data will be NULL, for _IOC_WRITE data is out area, for +- * _IOC_READ in area and if both are set in/out area. In all +- * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. +- * +- * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a +- * directory file handle. +- * +- * Note : the unsigned long request submitted by the application +- * is truncated to 32 bits. +- */ +- int (*ioctl) (const char *, unsigned int cmd, void *arg, +- struct fuse_file_info *, unsigned int flags, void *data); +- +- /** +- * Poll for IO readiness events +- * +- * Note: If ph is non-NULL, the client should notify +- * when IO readiness events occur by calling +- * fuse_notify_poll() with the specified ph. +- * +- * Regardless of the number of times poll with a non-NULL ph +- * is received, single notification is enough to clear all. +- * Notifying more times incurs overhead but doesn't harm +- * correctness. +- * +- * The callee is responsible for destroying ph with +- * fuse_pollhandle_destroy() when no longer in use. +- */ +- int (*poll) (const char *, struct fuse_file_info *, +- struct fuse_pollhandle *ph, unsigned *reventsp); +- +- /** Write contents of buffer to an open file +- * +- * Similar to the write() method, but data is supplied in a +- * generic buffer. Use fuse_buf_copy() to transfer data to +- * the destination. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, +- struct fuse_file_info *); +- +- /** Store data from an open file in a buffer +- * +- * Similar to the read() method, but data is stored and +- * returned in a generic buffer. +- * +- * No actual copying of data has to take place, the source +- * file descriptor may simply be stored in the buffer for +- * later data transfer. +- * +- * The buffer must be allocated dynamically and stored at the +- * location pointed to by bufp. If the buffer contains memory +- * regions, they too must be allocated using malloc(). The +- * allocated memory will be freed by the caller. +- */ +- int (*read_buf) (const char *, struct fuse_bufvec **bufp, +- size_t size, off_t off, struct fuse_file_info *); +- /** +- * Perform BSD file locking operation +- * +- * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN +- * +- * Nonblocking requests will be indicated by ORing LOCK_NB to +- * the above operations +- * +- * For more information see the flock(2) manual page. +- * +- * Additionally fi->owner will be set to a value unique to +- * this open file. This same value will be supplied to +- * ->release() when the file is released. +- * +- * Note: if this method is not implemented, the kernel will still +- * allow file locking to work locally. Hence it is only +- * interesting for network filesystems and similar. +- */ +- int (*flock) (const char *, struct fuse_file_info *, int op); +- +- /** +- * Allocates space for an open file +- * +- * This function ensures that required space is allocated for specified +- * file. If this function returns success then any subsequent write +- * request to specified range is guaranteed not to fail because of lack +- * of space on the file system media. +- */ +- int (*fallocate) (const char *, int, off_t, off_t, +- struct fuse_file_info *); +- +- /** +- * Copy a range of data from one file to another +- * +- * Performs an optimized copy between two file descriptors without the +- * additional cost of transferring data through the FUSE kernel module +- * to user space (glibc) and then back into the FUSE filesystem again. +- * +- * In case this method is not implemented, glibc falls back to reading +- * data from the source and writing to the destination. Effectively +- * doing an inefficient copy of the data. +- */ +- ssize_t (*copy_file_range) (const char *path_in, +- struct fuse_file_info *fi_in, +- off_t offset_in, const char *path_out, +- struct fuse_file_info *fi_out, +- off_t offset_out, size_t size, int flags); +- +- /** +- * Find next data or hole after the specified offset +- */ +- off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); ++ /** ++ * Get file attributes. ++ * ++ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are ++ * ignored. The 'st_ino' field is ignored except if the 'use_ino' ++ * mount option is given. In that case it is passed to userspace, ++ * but libfuse and the kernel will still assign a different ++ * inode for internal use (called the "nodeid"). ++ * ++ * `fi` will always be NULL if the file is not currently open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi); ++ ++ /** ++ * Read the target of a symbolic link ++ * ++ * The buffer should be filled with a null terminated string. The ++ * buffer size argument includes the space for the terminating ++ * null character. If the linkname is too long to fit in the ++ * buffer, it should be truncated. The return value should be 0 ++ * for success. ++ */ ++ int (*readlink)(const char *, char *, size_t); ++ ++ /** ++ * Create a file node ++ * ++ * This is called for creation of all non-directory, non-symlink ++ * nodes. If the filesystem defines a create() method, then for ++ * regular files that will be called instead. ++ */ ++ int (*mknod)(const char *, mode_t, dev_t); ++ ++ /** ++ * Create a directory ++ * ++ * Note that the mode argument may not have the type specification ++ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the ++ * correct directory type bits use mode|S_IFDIR ++ */ ++ int (*mkdir)(const char *, mode_t); ++ ++ /** Remove a file */ ++ int (*unlink)(const char *); ++ ++ /** Remove a directory */ ++ int (*rmdir)(const char *); ++ ++ /** Create a symbolic link */ ++ int (*symlink)(const char *, const char *); ++ ++ /** ++ * Rename a file ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ */ ++ int (*rename)(const char *, const char *, unsigned int flags); ++ ++ /** Create a hard link to a file */ ++ int (*link)(const char *, const char *); ++ ++ /** ++ * Change the permission bits of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*chmod)(const char *, mode_t, struct fuse_file_info *fi); ++ ++ /** ++ * Change the owner and group of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi); ++ ++ /** ++ * Change the size of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*truncate)(const char *, off_t, struct fuse_file_info *fi); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) ++ * should be used by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount option is ++ * given, this check is already done by the kernel before calling ++ * open() and may thus be omitted by the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open will also succeed without being send ++ * to the filesystem process. ++ * ++ */ ++ int (*open)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Read data from an open file ++ * ++ * Read should return exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the ++ * 'direct_io' mount option is specified, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ */ ++ int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *); ++ ++ /** ++ * Write data to an open file ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the 'direct_io' ++ * mount option is specified (see read operation). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write)(const char *, const char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** ++ * Get file system statistics ++ * ++ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored ++ */ ++ int (*statfs)(const char *, struct statvfs *); ++ ++ /** ++ * Possibly flush cached data ++ * ++ * BIG NOTE: This is not equivalent to fsync(). It's not a ++ * request to sync dirty data. ++ * ++ * Flush is called on each close() of a file descriptor, as opposed to ++ * release which is called on the close of the last file descriptor for ++ * a file. Under Linux, errors returned by flush() will be passed to ++ * userspace as errors from close(), so flush() is a good place to write ++ * back any cached dirty data. However, many applications ignore errors ++ * on close(), and on non-Linux systems, close() may succeed even if flush() ++ * returns an error. For these reasons, filesystems should not assume ++ * that errors returned by flush will ever be noticed or even ++ * delivered. ++ * ++ * NOTE: The flush() method may be called more than once for each ++ * open(). This happens if more than one file descriptor refers to an ++ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is ++ * not possible to determine if a flush is final, so each flush should ++ * be treated equally. Multiple write-flush sequences are relatively ++ * rare, so this shouldn't be a problem. ++ * ++ * Filesystems shouldn't assume that flush will be called at any ++ * particular point. It may be called more times than expected, or not ++ * at all. ++ * ++ * [close]: ++ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ int (*flush)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open() call there will be exactly one release() call ++ * with the same flags and file handle. It is possible to ++ * have a file opened more than once, in which case only the last ++ * release will mean, that no more reads/writes will happen on the ++ * file. The return value of release is ignored. ++ */ ++ int (*release)(const char *, struct fuse_file_info *); ++ ++ /* ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ */ ++ int (*fsync)(const char *, int, struct fuse_file_info *); ++ ++ /** Set extended attributes */ ++ int (*setxattr)(const char *, const char *, const char *, size_t, int); ++ ++ /** Get extended attributes */ ++ int (*getxattr)(const char *, const char *, char *, size_t); ++ ++ /** List extended attributes */ ++ int (*listxattr)(const char *, char *, size_t); ++ ++ /** Remove extended attributes */ ++ int (*removexattr)(const char *, const char *); ++ ++ /* ++ * Open directory ++ * ++ * Unless the 'default_permissions' mount option is given, ++ * this method should check if opendir is permitted for this ++ * directory. Optionally opendir may also return an arbitrary ++ * filehandle in the fuse_file_info structure, which will be ++ * passed to readdir, releasedir and fsyncdir. ++ */ ++ int (*opendir)(const char *, struct fuse_file_info *); ++ ++ /* ++ * Read directory ++ * ++ * The filesystem may choose between two modes of operation: ++ * ++ * 1) The readdir implementation ignores the offset parameter, and ++ * passes zero to the filler function's offset. The filler ++ * function will not return '1' (unless an error happens), so the ++ * whole directory is read in a single readdir operation. ++ * ++ * 2) The readdir implementation keeps track of the offsets of the ++ * directory entries. It uses the offset parameter and always ++ * passes non-zero offset to the filler function. When the buffer ++ * is full (or an error happens) the filler function will return ++ * '1'. ++ */ ++ int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t, ++ struct fuse_file_info *, enum fuse_readdir_flags); ++ ++ /** ++ * Release directory ++ */ ++ int (*releasedir)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data ++ */ ++ int (*fsyncdir)(const char *, int, struct fuse_file_info *); ++ ++ /** ++ * Initialize filesystem ++ * ++ * The return value will passed in the `private_data` field of ++ * `struct fuse_context` to all file operations, and as a ++ * parameter to the destroy() method. It overrides the initial ++ * value provided to fuse_main() / fuse_new(). ++ */ ++ void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg); ++ ++ /** ++ * Clean up filesystem ++ * ++ * Called on filesystem exit. ++ */ ++ void (*destroy)(void *private_data); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() system call. If the ++ * 'default_permissions' mount option is given, this method is not ++ * called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ */ ++ int (*access)(const char *, int); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ */ ++ int (*create)(const char *, mode_t, struct fuse_file_info *); ++ ++ /** ++ * Perform POSIX file locking operation ++ * ++ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. ++ * ++ * For the meaning of fields in 'struct flock' see the man page ++ * for fcntl(2). The l_whence field will always be set to ++ * SEEK_SET. ++ * ++ * For checking lock ownership, the 'fuse_file_info->owner' ++ * argument must be used. ++ * ++ * For F_GETLK operation, the library will first check currently ++ * held locks, and if a conflicting lock is found it will return ++ * information without calling this method. This ensures, that ++ * for local locks the l_pid field is correctly filled in. The ++ * results may not be accurate in case of race conditions and in ++ * the presence of hard links, but it's unlikely that an ++ * application would rely on accurate GETLK results in these ++ * cases. If a conflicting lock is not found, this method will be ++ * called, and the filesystem may fill out l_pid by a meaningful ++ * value, or it may leave this field zero. ++ * ++ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid ++ * of the process performing the locking operation. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *); ++ ++ /** ++ * Change the access and modification times of a file with ++ * nanosecond resolution ++ * ++ * This supersedes the old utime() interface. New applications ++ * should use this. ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * See the utimensat(2) man page for details. ++ */ ++ int (*utimens)(const char *, const struct timespec tv[2], ++ struct fuse_file_info *fi); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ */ ++ int (*bmap)(const char *, size_t blocksize, uint64_t *idx); ++ ++ /** ++ * Ioctl ++ * ++ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in ++ * 64bit environment. The size and direction of data is ++ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, ++ * data will be NULL, for _IOC_WRITE data is out area, for ++ * _IOC_READ in area and if both are set in/out area. In all ++ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. ++ * ++ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a ++ * directory file handle. ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ int (*ioctl)(const char *, unsigned int cmd, void *arg, ++ struct fuse_file_info *, unsigned int flags, void *data); ++ ++ /** ++ * Poll for IO readiness events ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ */ ++ int (*poll)(const char *, struct fuse_file_info *, ++ struct fuse_pollhandle *ph, unsigned *reventsp); ++ ++ /* ++ * Write contents of buffer to an open file ++ * ++ * Similar to the write() method, but data is supplied in a ++ * generic buffer. Use fuse_buf_copy() to transfer data to ++ * the destination. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *); ++ ++ /* ++ * Store data from an open file in a buffer ++ * ++ * Similar to the read() method, but data is stored and ++ * returned in a generic buffer. ++ * ++ * No actual copying of data has to take place, the source ++ * file descriptor may simply be stored in the buffer for ++ * later data transfer. ++ * ++ * The buffer must be allocated dynamically and stored at the ++ * location pointed to by bufp. If the buffer contains memory ++ * regions, they too must be allocated using malloc(). The ++ * allocated memory will be freed by the caller. ++ */ ++ int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size, ++ off_t off, struct fuse_file_info *); ++ /** ++ * Perform BSD file locking operation ++ * ++ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN ++ * ++ * Nonblocking requests will be indicated by ORing LOCK_NB to ++ * the above operations ++ * ++ * For more information see the flock(2) manual page. ++ * ++ * Additionally fi->owner will be set to a value unique to ++ * this open file. This same value will be supplied to ++ * ->release() when the file is released. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*flock)(const char *, struct fuse_file_info *, int op); ++ ++ /** ++ * Allocates space for an open file ++ * ++ * This function ensures that required space is allocated for specified ++ * file. If this function returns success then any subsequent write ++ * request to specified range is guaranteed not to fail because of lack ++ * of space on the file system media. ++ */ ++ int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ */ ++ ssize_t (*copy_file_range)(const char *path_in, ++ struct fuse_file_info *fi_in, off_t offset_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t offset_out, ++ size_t size, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ */ ++ off_t (*lseek)(const char *, off_t off, int whence, ++ struct fuse_file_info *); + }; + +-/** Extra context that may be needed by some filesystems ++/* ++ * Extra context that may be needed by some filesystems + * + * The uid, gid and pid fields are not filled in case of a writepage + * operation. + */ + struct fuse_context { +- /** Pointer to the fuse object */ +- struct fuse *fuse; ++ /** Pointer to the fuse object */ ++ struct fuse *fuse; + +- /** User ID of the calling process */ +- uid_t uid; ++ /** User ID of the calling process */ ++ uid_t uid; + +- /** Group ID of the calling process */ +- gid_t gid; ++ /** Group ID of the calling process */ ++ gid_t gid; + +- /** Process ID of the calling thread */ +- pid_t pid; ++ /** Process ID of the calling thread */ ++ pid_t pid; + +- /** Private filesystem data */ +- void *private_data; ++ /** Private filesystem data */ ++ void *private_data; + +- /** Umask of the calling process */ +- mode_t umask; ++ /** Umask of the calling process */ ++ mode_t umask; + }; + + /** +@@ -859,15 +880,15 @@ struct fuse_context { + * Example usage, see hello.c + */ + /* +- int fuse_main(int argc, char *argv[], const struct fuse_operations *op, +- void *private_data); +-*/ +-#define fuse_main(argc, argv, op, private_data) \ +- fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) ++ * int fuse_main(int argc, char *argv[], const struct fuse_operations *op, ++ * void *private_data); ++ */ ++#define fuse_main(argc, argv, op, private_data) \ ++ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) + +-/* ----------------------------------------------------------- * +- * More detailed API * +- * ----------------------------------------------------------- */ ++/* ++ * More detailed API ++ */ + + /** + * Print available options (high- and low-level) to stdout. This is +@@ -910,12 +931,13 @@ void fuse_lib_help(struct fuse_args *args); + * @return the created FUSE handle + */ + #if FUSE_USE_VERSION == 30 +-struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); ++struct fuse *fuse_new_30(struct fuse_args *args, ++ const struct fuse_operations *op, size_t op_size, ++ void *private_data); + #define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) + #else + struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); ++ size_t op_size, void *private_data); + #endif + + /** +@@ -940,7 +962,7 @@ void fuse_unmount(struct fuse *f); + /** + * Destroy the FUSE handle. + * +- * NOTE: This function does not unmount the filesystem. If this is ++ * NOTE: This function does not unmount the filesystem. If this is + * needed, call fuse_unmount() before calling this function. + * + * @param f the FUSE handle +@@ -1030,7 +1052,7 @@ int fuse_invalidate_path(struct fuse *f, const char *path); + * Do not call this directly, use fuse_main() + */ + int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, +- size_t op_size, void *private_data); ++ size_t op_size, void *private_data); + + /** + * Start the cleanup thread when using option "remember". +@@ -1081,89 +1103,87 @@ struct fuse_fs; + */ + + int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, +- struct fuse_file_info *fi); +-int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, +- const char *newpath, unsigned int flags); ++ struct fuse_file_info *fi); ++int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, ++ unsigned int flags); + int fuse_fs_unlink(struct fuse_fs *fs, const char *path); + int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); +-int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, +- const char *path); ++int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path); + int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); +-int fuse_fs_release(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++int fuse_fs_release(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); + int fuse_fs_open(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, +- off_t off, struct fuse_file_info *fi); ++ off_t off, struct fuse_file_info *fi); + int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, +- struct fuse_bufvec **bufp, size_t size, off_t off, +- struct fuse_file_info *fi); ++ struct fuse_bufvec **bufp, size_t size, off_t off, ++ struct fuse_file_info *fi); + int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, +- size_t size, off_t off, struct fuse_file_info *fi); ++ size_t size, off_t off, struct fuse_file_info *fi); + int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, +- struct fuse_bufvec *buf, off_t off, +- struct fuse_file_info *fi); ++ struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *fi); + int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_flush(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); + int fuse_fs_opendir(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, +- fuse_fill_dir_t filler, off_t off, +- struct fuse_file_info *fi, enum fuse_readdir_flags flags); ++ fuse_fill_dir_t filler, off_t off, ++ struct fuse_file_info *fi, enum fuse_readdir_flags flags); + int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_lock(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, int cmd, struct flock *lock); ++ struct fuse_file_info *fi, int cmd, struct flock *lock); + int fuse_fs_flock(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, int op); ++ struct fuse_file_info *fi, int op); + int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_utimens(struct fuse_fs *fs, const char *path, +- const struct timespec tv[2], struct fuse_file_info *fi); ++ const struct timespec tv[2], struct fuse_file_info *fi); + int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); + int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, +- size_t len); ++ size_t len); + int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, +- dev_t rdev); ++ dev_t rdev); + int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); + int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, +- const char *value, size_t size, int flags); ++ const char *value, size_t size, int flags); + int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, +- char *value, size_t size); ++ char *value, size_t size); + int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, +- size_t size); +-int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, +- const char *name); ++ size_t size); ++int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name); + int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, +- uint64_t *idx); ++ uint64_t *idx); + int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, +- void *arg, struct fuse_file_info *fi, unsigned int flags, +- void *data); ++ void *arg, struct fuse_file_info *fi, unsigned int flags, ++ void *data); + int fuse_fs_poll(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, struct fuse_pollhandle *ph, +- unsigned *reventsp); ++ struct fuse_file_info *fi, struct fuse_pollhandle *ph, ++ unsigned *reventsp); + int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi); ++ off_t offset, off_t length, struct fuse_file_info *fi); + ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, +- struct fuse_file_info *fi_in, off_t off_in, +- const char *path_out, +- struct fuse_file_info *fi_out, off_t off_out, +- size_t len, int flags); ++ struct fuse_file_info *fi_in, off_t off_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t off_out, ++ size_t len, int flags); + off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, +- struct fuse_config *cfg); ++ struct fuse_config *cfg); + void fuse_fs_destroy(struct fuse_fs *fs); + + int fuse_notify_poll(struct fuse_pollhandle *ph); +@@ -1182,7 +1202,7 @@ int fuse_notify_poll(struct fuse_pollhandle *ph); + * @return a new filesystem object + */ + struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, +- void *private_data); ++ void *private_data); + + /** + * Factory for creating filesystem objects +@@ -1199,7 +1219,7 @@ struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, + * @return the new filesystem object + */ + typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, +- struct fuse_fs *fs[]); ++ struct fuse_fs *fs[]); + /** + * Register filesystem module + * +@@ -1211,7 +1231,7 @@ typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, + * @param factory_ the factory function for this filesystem module + */ + #define FUSE_REGISTER_MODULE(name_, factory_) \ +- fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ ++ fuse_module_factory_t fuse_module_##name_##_factory = factory_ + + /** Get session from fuse object */ + struct fuse_session *fuse_get_session(struct fuse *f); +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index bf8f8cc..bd9bf86 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -1,21 +1,23 @@ +-/* FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++/* ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + /** @file */ + + #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) +-#error "Never include directly; use or instead." ++#error \ ++ "Never include directly; use or instead." + #endif + + #ifndef FUSE_COMMON_H_ + #define FUSE_COMMON_H_ + +-#include "fuse_opt.h" + #include "fuse_log.h" ++#include "fuse_opt.h" + #include + #include + +@@ -25,7 +27,7 @@ + /** Minor version of FUSE library interface */ + #define FUSE_MINOR_VERSION 2 + +-#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) ++#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) + #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + + /** +@@ -38,67 +40,83 @@ + * descriptors can share a single file handle. + */ + struct fuse_file_info { +- /** Open flags. Available in open() and release() */ +- int flags; +- +- /** In case of a write operation indicates if this was caused +- by a delayed write from the page cache. If so, then the +- context's pid, uid, and gid fields will not be valid, and +- the *fh* value may not match the *fh* value that would +- have been sent with the corresponding individual write +- requests if write caching had been disabled. */ +- unsigned int writepage : 1; +- +- /** Can be filled in by open, to use direct I/O on this file. */ +- unsigned int direct_io : 1; +- +- /** Can be filled in by open. It signals the kernel that any +- currently cached file data (ie., data that the filesystem +- provided the last time the file was open) need not be +- invalidated. Has no effect when set in other contexts (in +- particular it does nothing when set by opendir()). */ +- unsigned int keep_cache : 1; +- +- /** Indicates a flush operation. Set in flush operation, also +- maybe set in highlevel lock operation and lowlevel release +- operation. */ +- unsigned int flush : 1; +- +- /** Can be filled in by open, to indicate that the file is not +- seekable. */ +- unsigned int nonseekable : 1; +- +- /* Indicates that flock locks for this file should be +- released. If set, lock_owner shall contain a valid value. +- May only be set in ->release(). */ +- unsigned int flock_release : 1; +- +- /** Can be filled in by opendir. It signals the kernel to +- enable caching of entries returned by readdir(). Has no +- effect when set in other contexts (in particular it does +- nothing when set by open()). */ +- unsigned int cache_readdir : 1; +- +- /** Padding. Reserved for future use*/ +- unsigned int padding : 25; +- unsigned int padding2 : 32; +- +- /** File handle id. May be filled in by filesystem in create, +- * open, and opendir(). Available in most other file operations on the +- * same file handle. */ +- uint64_t fh; +- +- /** Lock owner id. Available in locking operations and flush */ +- uint64_t lock_owner; +- +- /** Requested poll events. Available in ->poll. Only set on kernels +- which support it. If unsupported, this field is set to zero. */ +- uint32_t poll_events; ++ /** Open flags. Available in open() and release() */ ++ int flags; ++ ++ /* ++ * In case of a write operation indicates if this was caused ++ * by a delayed write from the page cache. If so, then the ++ * context's pid, uid, and gid fields will not be valid, and ++ * the *fh* value may not match the *fh* value that would ++ * have been sent with the corresponding individual write ++ * requests if write caching had been disabled. ++ */ ++ unsigned int writepage:1; ++ ++ /** Can be filled in by open, to use direct I/O on this file. */ ++ unsigned int direct_io:1; ++ ++ /* ++ * Can be filled in by open. It signals the kernel that any ++ * currently cached file data (ie., data that the filesystem ++ * provided the last time the file was open) need not be ++ * invalidated. Has no effect when set in other contexts (in ++ * particular it does nothing when set by opendir()). ++ */ ++ unsigned int keep_cache:1; ++ ++ /* ++ * Indicates a flush operation. Set in flush operation, also ++ * maybe set in highlevel lock operation and lowlevel release ++ * operation. ++ */ ++ unsigned int flush:1; ++ ++ /* ++ * Can be filled in by open, to indicate that the file is not ++ * seekable. ++ */ ++ unsigned int nonseekable:1; ++ ++ /* ++ * Indicates that flock locks for this file should be ++ * released. If set, lock_owner shall contain a valid value. ++ * May only be set in ->release(). ++ */ ++ unsigned int flock_release:1; ++ ++ /* ++ * Can be filled in by opendir. It signals the kernel to ++ * enable caching of entries returned by readdir(). Has no ++ * effect when set in other contexts (in particular it does ++ * nothing when set by open()). ++ */ ++ unsigned int cache_readdir:1; ++ ++ /** Padding. Reserved for future use*/ ++ unsigned int padding:25; ++ unsigned int padding2:32; ++ ++ /* ++ * File handle id. May be filled in by filesystem in create, ++ * open, and opendir(). Available in most other file operations on the ++ * same file handle. ++ */ ++ uint64_t fh; ++ ++ /** Lock owner id. Available in locking operations and flush */ ++ uint64_t lock_owner; ++ ++ /* ++ * Requested poll events. Available in ->poll. Only set on kernels ++ * which support it. If unsupported, this field is set to zero. ++ */ ++ uint32_t poll_events; + }; + +-/************************************************************************** +- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * +- **************************************************************************/ ++/* ++ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' ++ */ + + /** + * Indicates that the filesystem supports asynchronous read requests. +@@ -110,7 +128,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ASYNC_READ (1 << 0) ++#define FUSE_CAP_ASYNC_READ (1 << 0) + + /** + * Indicates that the filesystem supports "remote" locking. +@@ -118,7 +136,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel, + * and if getlk() and setlk() handlers are implemented. + */ +-#define FUSE_CAP_POSIX_LOCKS (1 << 1) ++#define FUSE_CAP_POSIX_LOCKS (1 << 1) + + /** + * Indicates that the filesystem supports the O_TRUNC open flag. If +@@ -127,14 +145,14 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) ++#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) + + /** + * Indicates that the filesystem supports lookups of "." and "..". + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) ++#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) + + /** + * Indicates that the kernel should not apply the umask to the +@@ -142,7 +160,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_DONT_MASK (1 << 6) ++#define FUSE_CAP_DONT_MASK (1 << 6) + + /** + * Indicates that libfuse should try to use splice() when writing to +@@ -150,7 +168,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_SPLICE_WRITE (1 << 7) ++#define FUSE_CAP_SPLICE_WRITE (1 << 7) + + /** + * Indicates that libfuse should try to move pages instead of copying when +@@ -158,7 +176,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_SPLICE_MOVE (1 << 8) ++#define FUSE_CAP_SPLICE_MOVE (1 << 8) + + /** + * Indicates that libfuse should try to use splice() when reading from +@@ -167,7 +185,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a write_buf() handler. + */ +-#define FUSE_CAP_SPLICE_READ (1 << 9) ++#define FUSE_CAP_SPLICE_READ (1 << 9) + + /** + * If set, the calls to flock(2) will be emulated using POSIX locks and must +@@ -180,14 +198,14 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a flock() handler. + */ +-#define FUSE_CAP_FLOCK_LOCKS (1 << 10) ++#define FUSE_CAP_FLOCK_LOCKS (1 << 10) + + /** + * Indicates that the filesystem supports ioctl's on directories. + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_IOCTL_DIR (1 << 11) ++#define FUSE_CAP_IOCTL_DIR (1 << 11) + + /** + * Traditionally, while a file is open the FUSE kernel module only +@@ -209,7 +227,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) ++#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) + + /** + * Indicates that the filesystem supports readdirplus. +@@ -217,7 +235,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and if the + * filesystem implements a readdirplus() handler. + */ +-#define FUSE_CAP_READDIRPLUS (1 << 13) ++#define FUSE_CAP_READDIRPLUS (1 << 13) + + /** + * Indicates that the filesystem supports adaptive readdirplus. +@@ -245,7 +263,7 @@ struct fuse_file_info { + * if the filesystem implements both a readdirplus() and a readdir() + * handler. + */ +-#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) ++#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) + + /** + * Indicates that the filesystem supports asynchronous direct I/O submission. +@@ -256,7 +274,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ASYNC_DIO (1 << 15) ++#define FUSE_CAP_ASYNC_DIO (1 << 15) + + /** + * Indicates that writeback caching should be enabled. This means that +@@ -265,7 +283,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) ++#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) + + /** + * Indicates support for zero-message opens. If this flag is set in +@@ -278,7 +296,7 @@ struct fuse_file_info { + * Setting (or unsetting) this flag in the `want` field has *no + * effect*. + */ +-#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) ++#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) + + /** + * Indicates support for parallel directory operations. If this flag +@@ -288,7 +306,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) ++#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) + + /** + * Indicates support for POSIX ACLs. +@@ -307,7 +325,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_POSIX_ACL (1 << 19) ++#define FUSE_CAP_POSIX_ACL (1 << 19) + + /** + * Indicates that the filesystem is responsible for unsetting +@@ -316,7 +334,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) ++#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) + + /** + * Indicates support for zero-message opendirs. If this flag is set in +@@ -328,7 +346,7 @@ struct fuse_file_info { + * + * Setting (or unsetting) this flag in the `want` field has *no effect*. + */ +-#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) ++#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) + + /** + * Ioctl flags +@@ -340,12 +358,12 @@ struct fuse_file_info { + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +-#define FUSE_IOCTL_COMPAT (1 << 0) +-#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +-#define FUSE_IOCTL_RETRY (1 << 2) +-#define FUSE_IOCTL_DIR (1 << 4) ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_DIR (1 << 4) + +-#define FUSE_IOCTL_MAX_IOV 256 ++#define FUSE_IOCTL_MAX_IOV 256 + + /** + * Connection information, passed to the ->init() method +@@ -355,114 +373,114 @@ struct fuse_file_info { + * value must usually be smaller than the indicated value. + */ + struct fuse_conn_info { +- /** +- * Major version of the protocol (read-only) +- */ +- unsigned proto_major; +- +- /** +- * Minor version of the protocol (read-only) +- */ +- unsigned proto_minor; +- +- /** +- * Maximum size of the write buffer +- */ +- unsigned max_write; +- +- /** +- * Maximum size of read requests. A value of zero indicates no +- * limit. However, even if the filesystem does not specify a +- * limit, the maximum size of read requests will still be +- * limited by the kernel. +- * +- * NOTE: For the time being, the maximum size of read requests +- * must be set both here *and* passed to fuse_session_new() +- * using the ``-o max_read=`` mount option. At some point +- * in the future, specifying the mount option will no longer +- * be necessary. +- */ +- unsigned max_read; +- +- /** +- * Maximum readahead +- */ +- unsigned max_readahead; +- +- /** +- * Capability flags that the kernel supports (read-only) +- */ +- unsigned capable; +- +- /** +- * Capability flags that the filesystem wants to enable. +- * +- * libfuse attempts to initialize this field with +- * reasonable default values before calling the init() handler. +- */ +- unsigned want; +- +- /** +- * Maximum number of pending "background" requests. A +- * background request is any type of request for which the +- * total number is not limited by other means. As of kernel +- * 4.8, only two types of requests fall into this category: +- * +- * 1. Read-ahead requests +- * 2. Asynchronous direct I/O requests +- * +- * Read-ahead requests are generated (if max_readahead is +- * non-zero) by the kernel to preemptively fill its caches +- * when it anticipates that userspace will soon read more +- * data. +- * +- * Asynchronous direct I/O requests are generated if +- * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large +- * direct I/O request. In this case the kernel will internally +- * split it up into multiple smaller requests and submit them +- * to the filesystem concurrently. +- * +- * Note that the following requests are *not* background +- * requests: writeback requests (limited by the kernel's +- * flusher algorithm), regular (i.e., synchronous and +- * buffered) userspace read/write requests (limited to one per +- * thread), asynchronous read requests (Linux's io_submit(2) +- * call actually blocks, so these are also limited to one per +- * thread). +- */ +- unsigned max_background; +- +- /** +- * Kernel congestion threshold parameter. If the number of pending +- * background requests exceeds this number, the FUSE kernel module will +- * mark the filesystem as "congested". This instructs the kernel to +- * expect that queued requests will take some time to complete, and to +- * adjust its algorithms accordingly (e.g. by putting a waiting thread +- * to sleep instead of using a busy-loop). +- */ +- unsigned congestion_threshold; +- +- /** +- * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible +- * for updating mtime and ctime when write requests are received. The +- * updated values are passed to the filesystem with setattr() requests. +- * However, if the filesystem does not support the full resolution of +- * the kernel timestamps (nanoseconds), the mtime and ctime values used +- * by kernel and filesystem will differ (and result in an apparent +- * change of times after a cache flush). +- * +- * To prevent this problem, this variable can be used to inform the +- * kernel about the timestamp granularity supported by the file-system. +- * The value should be power of 10. The default is 1, i.e. full +- * nano-second resolution. Filesystems supporting only second resolution +- * should set this to 1000000000. +- */ +- unsigned time_gran; +- +- /** +- * For future use. +- */ +- unsigned reserved[22]; ++ /** ++ * Major version of the protocol (read-only) ++ */ ++ unsigned proto_major; ++ ++ /** ++ * Minor version of the protocol (read-only) ++ */ ++ unsigned proto_minor; ++ ++ /** ++ * Maximum size of the write buffer ++ */ ++ unsigned max_write; ++ ++ /** ++ * Maximum size of read requests. A value of zero indicates no ++ * limit. However, even if the filesystem does not specify a ++ * limit, the maximum size of read requests will still be ++ * limited by the kernel. ++ * ++ * NOTE: For the time being, the maximum size of read requests ++ * must be set both here *and* passed to fuse_session_new() ++ * using the ``-o max_read=`` mount option. At some point ++ * in the future, specifying the mount option will no longer ++ * be necessary. ++ */ ++ unsigned max_read; ++ ++ /** ++ * Maximum readahead ++ */ ++ unsigned max_readahead; ++ ++ /** ++ * Capability flags that the kernel supports (read-only) ++ */ ++ unsigned capable; ++ ++ /** ++ * Capability flags that the filesystem wants to enable. ++ * ++ * libfuse attempts to initialize this field with ++ * reasonable default values before calling the init() handler. ++ */ ++ unsigned want; ++ ++ /** ++ * Maximum number of pending "background" requests. A ++ * background request is any type of request for which the ++ * total number is not limited by other means. As of kernel ++ * 4.8, only two types of requests fall into this category: ++ * ++ * 1. Read-ahead requests ++ * 2. Asynchronous direct I/O requests ++ * ++ * Read-ahead requests are generated (if max_readahead is ++ * non-zero) by the kernel to preemptively fill its caches ++ * when it anticipates that userspace will soon read more ++ * data. ++ * ++ * Asynchronous direct I/O requests are generated if ++ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large ++ * direct I/O request. In this case the kernel will internally ++ * split it up into multiple smaller requests and submit them ++ * to the filesystem concurrently. ++ * ++ * Note that the following requests are *not* background ++ * requests: writeback requests (limited by the kernel's ++ * flusher algorithm), regular (i.e., synchronous and ++ * buffered) userspace read/write requests (limited to one per ++ * thread), asynchronous read requests (Linux's io_submit(2) ++ * call actually blocks, so these are also limited to one per ++ * thread). ++ */ ++ unsigned max_background; ++ ++ /** ++ * Kernel congestion threshold parameter. If the number of pending ++ * background requests exceeds this number, the FUSE kernel module will ++ * mark the filesystem as "congested". This instructs the kernel to ++ * expect that queued requests will take some time to complete, and to ++ * adjust its algorithms accordingly (e.g. by putting a waiting thread ++ * to sleep instead of using a busy-loop). ++ */ ++ unsigned congestion_threshold; ++ ++ /** ++ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible ++ * for updating mtime and ctime when write requests are received. The ++ * updated values are passed to the filesystem with setattr() requests. ++ * However, if the filesystem does not support the full resolution of ++ * the kernel timestamps (nanoseconds), the mtime and ctime values used ++ * by kernel and filesystem will differ (and result in an apparent ++ * change of times after a cache flush). ++ * ++ * To prevent this problem, this variable can be used to inform the ++ * kernel about the timestamp granularity supported by the file-system. ++ * The value should be power of 10. The default is 1, i.e. full ++ * nano-second resolution. Filesystems supporting only second resolution ++ * should set this to 1000000000. ++ */ ++ unsigned time_gran; ++ ++ /** ++ * For future use. ++ */ ++ unsigned reserved[22]; + }; + + struct fuse_session; +@@ -489,21 +507,20 @@ struct fuse_conn_info_opts; + * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want + * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want + * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want +- * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock +- * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want +- * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want +- * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want +- * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want +- * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want +- * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want +- * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want +- * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets +- * FUSE_CAP_READDIRPLUS_AUTO in conn->want +- * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and +- * FUSE_CAP_READDIRPLUS_AUTO in conn->want +- * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want +- * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want +- * -o time_gran=N sets conn->time_gran ++ * -o no_remote_lock Equivalent to -o ++ *no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets ++ *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets ++ *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets ++ *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets ++ *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets ++ *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets ++ *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets ++ *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets ++ *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o ++ *readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO ++ *in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in ++ *conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in ++ *conn->want -o time_gran=N sets conn->time_gran + * + * Known options will be removed from *args*, unknown options will be + * passed through unchanged. +@@ -511,7 +528,7 @@ struct fuse_conn_info_opts; + * @param args argument vector (input+output) + * @return parsed options + **/ +-struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); ++struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args); + + /** + * This function applies the (parsed) parameters in *opts* to the +@@ -521,7 +538,7 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); + * option has been explicitly set. + */ + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +- struct fuse_conn_info *conn); ++ struct fuse_conn_info *conn); + + /** + * Go into the background +@@ -552,81 +569,81 @@ const char *fuse_pkgversion(void); + */ + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); + +-/* ----------------------------------------------------------- * +- * Data buffer * +- * ----------------------------------------------------------- */ ++/* ++ * Data buffer ++ */ + + /** + * Buffer flags + */ + enum fuse_buf_flags { +- /** +- * Buffer contains a file descriptor +- * +- * If this flag is set, the .fd field is valid, otherwise the +- * .mem fields is valid. +- */ +- FUSE_BUF_IS_FD = (1 << 1), +- +- /** +- * Seek on the file descriptor +- * +- * If this flag is set then the .pos field is valid and is +- * used to seek to the given offset before performing +- * operation on file descriptor. +- */ +- FUSE_BUF_FD_SEEK = (1 << 2), +- +- /** +- * Retry operation on file descriptor +- * +- * If this flag is set then retry operation on file descriptor +- * until .size bytes have been copied or an error or EOF is +- * detected. +- */ +- FUSE_BUF_FD_RETRY = (1 << 3), ++ /** ++ * Buffer contains a file descriptor ++ * ++ * If this flag is set, the .fd field is valid, otherwise the ++ * .mem fields is valid. ++ */ ++ FUSE_BUF_IS_FD = (1 << 1), ++ ++ /** ++ * Seek on the file descriptor ++ * ++ * If this flag is set then the .pos field is valid and is ++ * used to seek to the given offset before performing ++ * operation on file descriptor. ++ */ ++ FUSE_BUF_FD_SEEK = (1 << 2), ++ ++ /** ++ * Retry operation on file descriptor ++ * ++ * If this flag is set then retry operation on file descriptor ++ * until .size bytes have been copied or an error or EOF is ++ * detected. ++ */ ++ FUSE_BUF_FD_RETRY = (1 << 3), + }; + + /** + * Buffer copy flags + */ + enum fuse_buf_copy_flags { +- /** +- * Don't use splice(2) +- * +- * Always fall back to using read and write instead of +- * splice(2) to copy data from one file descriptor to another. +- * +- * If this flag is not set, then only fall back if splice is +- * unavailable. +- */ +- FUSE_BUF_NO_SPLICE = (1 << 1), +- +- /** +- * Force splice +- * +- * Always use splice(2) to copy data from one file descriptor +- * to another. If splice is not available, return -EINVAL. +- */ +- FUSE_BUF_FORCE_SPLICE = (1 << 2), +- +- /** +- * Try to move data with splice. +- * +- * If splice is used, try to move pages from the source to the +- * destination instead of copying. See documentation of +- * SPLICE_F_MOVE in splice(2) man page. +- */ +- FUSE_BUF_SPLICE_MOVE = (1 << 3), +- +- /** +- * Don't block on the pipe when copying data with splice +- * +- * Makes the operations on the pipe non-blocking (if the pipe +- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) +- * man page. +- */ +- FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), ++ /** ++ * Don't use splice(2) ++ * ++ * Always fall back to using read and write instead of ++ * splice(2) to copy data from one file descriptor to another. ++ * ++ * If this flag is not set, then only fall back if splice is ++ * unavailable. ++ */ ++ FUSE_BUF_NO_SPLICE = (1 << 1), ++ ++ /** ++ * Force splice ++ * ++ * Always use splice(2) to copy data from one file descriptor ++ * to another. If splice is not available, return -EINVAL. ++ */ ++ FUSE_BUF_FORCE_SPLICE = (1 << 2), ++ ++ /** ++ * Try to move data with splice. ++ * ++ * If splice is used, try to move pages from the source to the ++ * destination instead of copying. See documentation of ++ * SPLICE_F_MOVE in splice(2) man page. ++ */ ++ FUSE_BUF_SPLICE_MOVE = (1 << 3), ++ ++ /** ++ * Don't block on the pipe when copying data with splice ++ * ++ * Makes the operations on the pipe non-blocking (if the pipe ++ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) ++ * man page. ++ */ ++ FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), + }; + + /** +@@ -636,36 +653,36 @@ enum fuse_buf_copy_flags { + * be supplied as a memory pointer or as a file descriptor + */ + struct fuse_buf { +- /** +- * Size of data in bytes +- */ +- size_t size; +- +- /** +- * Buffer flags +- */ +- enum fuse_buf_flags flags; +- +- /** +- * Memory pointer +- * +- * Used unless FUSE_BUF_IS_FD flag is set. +- */ +- void *mem; +- +- /** +- * File descriptor +- * +- * Used if FUSE_BUF_IS_FD flag is set. +- */ +- int fd; +- +- /** +- * File position +- * +- * Used if FUSE_BUF_FD_SEEK flag is set. +- */ +- off_t pos; ++ /** ++ * Size of data in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Buffer flags ++ */ ++ enum fuse_buf_flags flags; ++ ++ /** ++ * Memory pointer ++ * ++ * Used unless FUSE_BUF_IS_FD flag is set. ++ */ ++ void *mem; ++ ++ /** ++ * File descriptor ++ * ++ * Used if FUSE_BUF_IS_FD flag is set. ++ */ ++ int fd; ++ ++ /** ++ * File position ++ * ++ * Used if FUSE_BUF_FD_SEEK flag is set. ++ */ ++ off_t pos; + }; + + /** +@@ -677,41 +694,39 @@ struct fuse_buf { + * Allocate dynamically to add more than one buffer. + */ + struct fuse_bufvec { +- /** +- * Number of buffers in the array +- */ +- size_t count; +- +- /** +- * Index of current buffer within the array +- */ +- size_t idx; +- +- /** +- * Current offset within the current buffer +- */ +- size_t off; +- +- /** +- * Array of buffers +- */ +- struct fuse_buf buf[1]; ++ /** ++ * Number of buffers in the array ++ */ ++ size_t count; ++ ++ /** ++ * Index of current buffer within the array ++ */ ++ size_t idx; ++ ++ /** ++ * Current offset within the current buffer ++ */ ++ size_t off; ++ ++ /** ++ * Array of buffers ++ */ ++ struct fuse_buf buf[1]; + }; + + /* Initialize bufvec with a single buffer of given size */ +-#define FUSE_BUFVEC_INIT(size__) \ +- ((struct fuse_bufvec) { \ +- /* .count= */ 1, \ +- /* .idx = */ 0, \ +- /* .off = */ 0, \ +- /* .buf = */ { /* [0] = */ { \ +- /* .size = */ (size__), \ +- /* .flags = */ (enum fuse_buf_flags) 0, \ +- /* .mem = */ NULL, \ +- /* .fd = */ -1, \ +- /* .pos = */ 0, \ +- } } \ +- } ) ++#define FUSE_BUFVEC_INIT(size__) \ ++ ((struct fuse_bufvec){ /* .count= */ 1, \ ++ /* .idx = */ 0, \ ++ /* .off = */ 0, /* .buf = */ \ ++ { /* [0] = */ { \ ++ /* .size = */ (size__), \ ++ /* .flags = */ (enum fuse_buf_flags)0, \ ++ /* .mem = */ NULL, \ ++ /* .fd = */ -1, \ ++ /* .pos = */ 0, \ ++ } } }) + + /** + * Get total size of data in a fuse buffer vector +@@ -730,16 +745,16 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + * @return actual number of bytes copied or -errno on error + */ + ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, +- enum fuse_buf_copy_flags flags); ++ enum fuse_buf_copy_flags flags); + +-/* ----------------------------------------------------------- * +- * Signal handling * +- * ----------------------------------------------------------- */ ++/* ++ * Signal handling ++ */ + + /** + * Exit session on HUP, TERM and INT signals and ignore PIPE signal + * +- * Stores session in a global variable. May only be called once per ++ * Stores session in a global variable. May only be called once per + * process until fuse_remove_signal_handlers() is called. + * + * Once either of the POSIX signals arrives, the signal handler calls +@@ -766,12 +781,12 @@ int fuse_set_signal_handlers(struct fuse_session *se); + */ + void fuse_remove_signal_handlers(struct fuse_session *se); + +-/* ----------------------------------------------------------- * +- * Compatibility stuff * +- * ----------------------------------------------------------- */ ++/* ++ * Compatibility stuff ++ */ + + #if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 +-# error only API version 30 or greater is supported ++#error only API version 30 or greater is supported + #endif + + +@@ -781,11 +796,14 @@ void fuse_remove_signal_handlers(struct fuse_session *se); + * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! + */ + +-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++#if defined(__GNUC__) && \ ++ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ ++ !defined __cplusplus + _Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); + #else +-struct _fuse_off_t_must_be_64bit_dummy_struct \ +- { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; ++struct _fuse_off_t_must_be_64bit_dummy_struct { ++ unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); ++}; + #endif + + #endif /* FUSE_COMMON_H_ */ +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index b39522e..e63cb58 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -1,71 +1,71 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "fuse.h" + #include "fuse_lowlevel.h" + + struct fuse_req { +- struct fuse_session *se; +- uint64_t unique; +- int ctr; +- pthread_mutex_t lock; +- struct fuse_ctx ctx; +- struct fuse_chan *ch; +- int interrupted; +- unsigned int ioctl_64bit : 1; +- union { +- struct { +- uint64_t unique; +- } i; +- struct { +- fuse_interrupt_func_t func; +- void *data; +- } ni; +- } u; +- struct fuse_req *next; +- struct fuse_req *prev; ++ struct fuse_session *se; ++ uint64_t unique; ++ int ctr; ++ pthread_mutex_t lock; ++ struct fuse_ctx ctx; ++ struct fuse_chan *ch; ++ int interrupted; ++ unsigned int ioctl_64bit:1; ++ union { ++ struct { ++ uint64_t unique; ++ } i; ++ struct { ++ fuse_interrupt_func_t func; ++ void *data; ++ } ni; ++ } u; ++ struct fuse_req *next; ++ struct fuse_req *prev; + }; + + struct fuse_notify_req { +- uint64_t unique; +- void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, +- const void *, const struct fuse_buf *); +- struct fuse_notify_req *next; +- struct fuse_notify_req *prev; ++ uint64_t unique; ++ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, ++ const void *, const struct fuse_buf *); ++ struct fuse_notify_req *next; ++ struct fuse_notify_req *prev; + }; + + struct fuse_session { +- char *mountpoint; +- volatile int exited; +- int fd; +- int debug; +- int deny_others; +- struct fuse_lowlevel_ops op; +- int got_init; +- struct cuse_data *cuse_data; +- void *userdata; +- uid_t owner; +- struct fuse_conn_info conn; +- struct fuse_req list; +- struct fuse_req interrupts; +- pthread_mutex_t lock; +- int got_destroy; +- int broken_splice_nonblock; +- uint64_t notify_ctr; +- struct fuse_notify_req notify_list; +- size_t bufsize; +- int error; ++ char *mountpoint; ++ volatile int exited; ++ int fd; ++ int debug; ++ int deny_others; ++ struct fuse_lowlevel_ops op; ++ int got_init; ++ struct cuse_data *cuse_data; ++ void *userdata; ++ uid_t owner; ++ struct fuse_conn_info conn; ++ struct fuse_req list; ++ struct fuse_req interrupts; ++ pthread_mutex_t lock; ++ int got_destroy; ++ int broken_splice_nonblock; ++ uint64_t notify_ctr; ++ struct fuse_notify_req notify_list; ++ size_t bufsize; ++ int error; + }; + + struct fuse_chan { +- pthread_mutex_t lock; +- int ctr; +- int fd; ++ pthread_mutex_t lock; ++ int ctr; ++ int fd; + }; + + /** +@@ -76,19 +76,20 @@ struct fuse_chan { + * + */ + struct fuse_module { +- char *name; +- fuse_module_factory_t factory; +- struct fuse_module *next; +- struct fusemod_so *so; +- int ctr; ++ char *name; ++ fuse_module_factory_t factory; ++ struct fuse_module *next; ++ struct fusemod_so *so; ++ int ctr; + }; + + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, +- int count); ++ int count); + void fuse_free_req(fuse_req_t req); + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, struct fuse_chan *ch); ++ const struct fuse_buf *buf, ++ struct fuse_chan *ch); + + + #define FUSE_MAX_MAX_PAGES 256 +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +index 0d268ab..11345f9 100644 +--- a/tools/virtiofsd/fuse_log.c ++++ b/tools/virtiofsd/fuse_log.c +@@ -1,40 +1,40 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2019 Red Hat, Inc. +- +- Logging API. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * Logging API. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "fuse_log.h" + + #include + #include + +-static void default_log_func( +- __attribute__(( unused )) enum fuse_log_level level, +- const char *fmt, va_list ap) ++static void default_log_func(__attribute__((unused)) enum fuse_log_level level, ++ const char *fmt, va_list ap) + { +- vfprintf(stderr, fmt, ap); ++ vfprintf(stderr, fmt, ap); + } + + static fuse_log_func_t log_func = default_log_func; + + void fuse_set_log_func(fuse_log_func_t func) + { +- if (!func) +- func = default_log_func; ++ if (!func) { ++ func = default_log_func; ++ } + +- log_func = func; ++ log_func = func; + } + + void fuse_log(enum fuse_log_level level, const char *fmt, ...) + { +- va_list ap; ++ va_list ap; + +- va_start(ap, fmt); +- log_func(level, fmt, ap); +- va_end(ap); ++ va_start(ap, fmt); ++ log_func(level, fmt, ap); ++ va_end(ap); + } +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +index 0af700d..bf6c11f 100644 +--- a/tools/virtiofsd/fuse_log.h ++++ b/tools/virtiofsd/fuse_log.h +@@ -1,10 +1,10 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2019 Red Hat, Inc. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_LOG_H_ + #define FUSE_LOG_H_ +@@ -22,14 +22,14 @@ + * These levels correspond to syslog(2) log levels since they are widely used. + */ + enum fuse_log_level { +- FUSE_LOG_EMERG, +- FUSE_LOG_ALERT, +- FUSE_LOG_CRIT, +- FUSE_LOG_ERR, +- FUSE_LOG_WARNING, +- FUSE_LOG_NOTICE, +- FUSE_LOG_INFO, +- FUSE_LOG_DEBUG ++ FUSE_LOG_EMERG, ++ FUSE_LOG_ALERT, ++ FUSE_LOG_CRIT, ++ FUSE_LOG_ERR, ++ FUSE_LOG_WARNING, ++ FUSE_LOG_NOTICE, ++ FUSE_LOG_INFO, ++ FUSE_LOG_DEBUG + }; + + /** +@@ -45,8 +45,8 @@ enum fuse_log_level { + * @param fmt sprintf-style format string including newline + * @param ap format string arguments + */ +-typedef void (*fuse_log_func_t)(enum fuse_log_level level, +- const char *fmt, va_list ap); ++typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt, ++ va_list ap); + + /** + * Install a custom log handler function. +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index e6fa247..5c9cb52 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1,2380 +1,2515 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Implementation of (most of) the low-level FUSE API. The session loop +- functions are implemented in separate files. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Implementation of (most of) the low-level FUSE API. The session loop ++ * functions are implemented in separate files. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #define _GNU_SOURCE + + #include "config.h" + #include "fuse_i.h" + #include "fuse_kernel.h" +-#include "fuse_opt.h" + #include "fuse_misc.h" ++#include "fuse_opt.h" + ++#include ++#include ++#include ++#include + #include + #include +-#include + #include +-#include +-#include +-#include +-#include + #include +- ++#include + + + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + +-#define container_of(ptr, type, member) ({ \ +- const typeof( ((type *)0)->member ) *__mptr = (ptr); \ +- (type *)( (char *)__mptr - offsetof(type,member) );}) ++#define container_of(ptr, type, member) \ ++ ({ \ ++ const typeof(((type *)0)->member) *__mptr = (ptr); \ ++ (type *)((char *)__mptr - offsetof(type, member)); \ ++ }) + + struct fuse_pollhandle { +- uint64_t kh; +- struct fuse_session *se; ++ uint64_t kh; ++ struct fuse_session *se; + }; + + static size_t pagesize; + + static __attribute__((constructor)) void fuse_ll_init_pagesize(void) + { +- pagesize = getpagesize(); ++ pagesize = getpagesize(); + } + + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) + { +- attr->ino = stbuf->st_ino; +- attr->mode = stbuf->st_mode; +- attr->nlink = stbuf->st_nlink; +- attr->uid = stbuf->st_uid; +- attr->gid = stbuf->st_gid; +- attr->rdev = stbuf->st_rdev; +- attr->size = stbuf->st_size; +- attr->blksize = stbuf->st_blksize; +- attr->blocks = stbuf->st_blocks; +- attr->atime = stbuf->st_atime; +- attr->mtime = stbuf->st_mtime; +- attr->ctime = stbuf->st_ctime; +- attr->atimensec = ST_ATIM_NSEC(stbuf); +- attr->mtimensec = ST_MTIM_NSEC(stbuf); +- attr->ctimensec = ST_CTIM_NSEC(stbuf); ++ attr->ino = stbuf->st_ino; ++ attr->mode = stbuf->st_mode; ++ attr->nlink = stbuf->st_nlink; ++ attr->uid = stbuf->st_uid; ++ attr->gid = stbuf->st_gid; ++ attr->rdev = stbuf->st_rdev; ++ attr->size = stbuf->st_size; ++ attr->blksize = stbuf->st_blksize; ++ attr->blocks = stbuf->st_blocks; ++ attr->atime = stbuf->st_atime; ++ attr->mtime = stbuf->st_mtime; ++ attr->ctime = stbuf->st_ctime; ++ attr->atimensec = ST_ATIM_NSEC(stbuf); ++ attr->mtimensec = ST_MTIM_NSEC(stbuf); ++ attr->ctimensec = ST_CTIM_NSEC(stbuf); + } + + static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) + { +- stbuf->st_mode = attr->mode; +- stbuf->st_uid = attr->uid; +- stbuf->st_gid = attr->gid; +- stbuf->st_size = attr->size; +- stbuf->st_atime = attr->atime; +- stbuf->st_mtime = attr->mtime; +- stbuf->st_ctime = attr->ctime; +- ST_ATIM_NSEC_SET(stbuf, attr->atimensec); +- ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); +- ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); ++ stbuf->st_mode = attr->mode; ++ stbuf->st_uid = attr->uid; ++ stbuf->st_gid = attr->gid; ++ stbuf->st_size = attr->size; ++ stbuf->st_atime = attr->atime; ++ stbuf->st_mtime = attr->mtime; ++ stbuf->st_ctime = attr->ctime; ++ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); ++ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); ++ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); + } + +-static size_t iov_length(const struct iovec *iov, size_t count) ++static size_t iov_length(const struct iovec *iov, size_t count) + { +- size_t seg; +- size_t ret = 0; ++ size_t seg; ++ size_t ret = 0; + +- for (seg = 0; seg < count; seg++) +- ret += iov[seg].iov_len; +- return ret; ++ for (seg = 0; seg < count; seg++) { ++ ret += iov[seg].iov_len; ++ } ++ return ret; + } + + static void list_init_req(struct fuse_req *req) + { +- req->next = req; +- req->prev = req; ++ req->next = req; ++ req->prev = req; + } + + static void list_del_req(struct fuse_req *req) + { +- struct fuse_req *prev = req->prev; +- struct fuse_req *next = req->next; +- prev->next = next; +- next->prev = prev; ++ struct fuse_req *prev = req->prev; ++ struct fuse_req *next = req->next; ++ prev->next = next; ++ next->prev = prev; + } + + static void list_add_req(struct fuse_req *req, struct fuse_req *next) + { +- struct fuse_req *prev = next->prev; +- req->next = next; +- req->prev = prev; +- prev->next = req; +- next->prev = req; ++ struct fuse_req *prev = next->prev; ++ req->next = next; ++ req->prev = prev; ++ prev->next = req; ++ next->prev = req; + } + + static void destroy_req(fuse_req_t req) + { +- pthread_mutex_destroy(&req->lock); +- free(req); ++ pthread_mutex_destroy(&req->lock); ++ free(req); + } + + void fuse_free_req(fuse_req_t req) + { +- int ctr; +- struct fuse_session *se = req->se; ++ int ctr; ++ struct fuse_session *se = req->se; + +- pthread_mutex_lock(&se->lock); +- req->u.ni.func = NULL; +- req->u.ni.data = NULL; +- list_del_req(req); +- ctr = --req->ctr; +- req->ch = NULL; +- pthread_mutex_unlock(&se->lock); +- if (!ctr) +- destroy_req(req); ++ pthread_mutex_lock(&se->lock); ++ req->u.ni.func = NULL; ++ req->u.ni.data = NULL; ++ list_del_req(req); ++ ctr = --req->ctr; ++ req->ch = NULL; ++ pthread_mutex_unlock(&se->lock); ++ if (!ctr) { ++ destroy_req(req); ++ } + } + + static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) + { +- struct fuse_req *req; ++ struct fuse_req *req; + +- req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); +- if (req == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); +- } else { +- req->se = se; +- req->ctr = 1; +- list_init_req(req); +- fuse_mutex_init(&req->lock); +- } ++ req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req)); ++ if (req == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); ++ } else { ++ req->se = se; ++ req->ctr = 1; ++ list_init_req(req); ++ fuse_mutex_init(&req->lock); ++ } + +- return req; ++ return req; + } + + /* Send data. If *ch* is NULL, send via session master fd */ + static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int count) ++ struct iovec *iov, int count) + { +- struct fuse_out_header *out = iov[0].iov_base; ++ struct fuse_out_header *out = iov[0].iov_base; + +- out->len = iov_length(iov, count); +- if (se->debug) { +- if (out->unique == 0) { +- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", +- out->error, out->len); +- } else if (out->error) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, error: %i (%s), outsize: %i\n", +- (unsigned long long) out->unique, out->error, +- strerror(-out->error), out->len); +- } else { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, success, outsize: %i\n", +- (unsigned long long) out->unique, out->len); +- } +- } ++ out->len = iov_length(iov, count); ++ if (se->debug) { ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, ++ out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long)out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", ++ (unsigned long long)out->unique, out->len); ++ } ++ } + +- abort(); /* virtio should have taken it before here */ +- return 0; ++ abort(); /* virtio should have taken it before here */ ++ return 0; + } + + + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, +- int count) ++ int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out; + +- if (error <= -1000 || error > 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); +- error = -ERANGE; +- } ++ if (error <= -1000 || error > 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); ++ error = -ERANGE; ++ } + +- out.unique = req->unique; +- out.error = error; ++ out.unique = req->unique; ++ out.error = error; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- return fuse_send_msg(req->se, req->ch, iov, count); ++ return fuse_send_msg(req->se, req->ch, iov, count); + } + + static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, +- int count) ++ int count) + { +- int res; ++ int res; + +- res = fuse_send_reply_iov_nofree(req, error, iov, count); +- fuse_free_req(req); +- return res; ++ res = fuse_send_reply_iov_nofree(req, error, iov, count); ++ fuse_free_req(req); ++ return res; + } + + static int send_reply(fuse_req_t req, int error, const void *arg, +- size_t argsize) ++ size_t argsize) + { +- struct iovec iov[2]; +- int count = 1; +- if (argsize) { +- iov[1].iov_base = (void *) arg; +- iov[1].iov_len = argsize; +- count++; +- } +- return send_reply_iov(req, error, iov, count); ++ struct iovec iov[2]; ++ int count = 1; ++ if (argsize) { ++ iov[1].iov_base = (void *)arg; ++ iov[1].iov_len = argsize; ++ count++; ++ } ++ return send_reply_iov(req, error, iov, count); + } + + int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) + { +- int res; +- struct iovec *padded_iov; ++ int res; ++ struct iovec *padded_iov; + +- padded_iov = malloc((count + 1) * sizeof(struct iovec)); +- if (padded_iov == NULL) +- return fuse_reply_err(req, ENOMEM); ++ padded_iov = malloc((count + 1) * sizeof(struct iovec)); ++ if (padded_iov == NULL) { ++ return fuse_reply_err(req, ENOMEM); ++ } + +- memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); +- count++; ++ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); ++ count++; + +- res = send_reply_iov(req, 0, padded_iov, count); +- free(padded_iov); ++ res = send_reply_iov(req, 0, padded_iov, count); ++ free(padded_iov); + +- return res; ++ return res; + } + + +-/* `buf` is allowed to be empty so that the proper size may be +- allocated by the caller */ ++/* ++ * 'buf` is allowed to be empty so that the proper size may be ++ * allocated by the caller ++ */ + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, const struct stat *stbuf, off_t off) ++ const char *name, const struct stat *stbuf, off_t off) + { +- (void)req; +- size_t namelen; +- size_t entlen; +- size_t entlen_padded; +- struct fuse_dirent *dirent; ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ struct fuse_dirent *dirent; + +- namelen = strlen(name); +- entlen = FUSE_NAME_OFFSET + namelen; +- entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); + +- if ((buf == NULL) || (entlen_padded > bufsize)) +- return entlen_padded; ++ if ((buf == NULL) || (entlen_padded > bufsize)) { ++ return entlen_padded; ++ } + +- dirent = (struct fuse_dirent*) buf; +- dirent->ino = stbuf->st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (stbuf->st_mode & S_IFMT) >> 12; +- memcpy(dirent->name, name, namelen); +- memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ dirent = (struct fuse_dirent *)buf; ++ dirent->ino = stbuf->st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); + +- return entlen_padded; ++ return entlen_padded; + } + + static void convert_statfs(const struct statvfs *stbuf, +- struct fuse_kstatfs *kstatfs) ++ struct fuse_kstatfs *kstatfs) + { +- kstatfs->bsize = stbuf->f_bsize; +- kstatfs->frsize = stbuf->f_frsize; +- kstatfs->blocks = stbuf->f_blocks; +- kstatfs->bfree = stbuf->f_bfree; +- kstatfs->bavail = stbuf->f_bavail; +- kstatfs->files = stbuf->f_files; +- kstatfs->ffree = stbuf->f_ffree; +- kstatfs->namelen = stbuf->f_namemax; ++ kstatfs->bsize = stbuf->f_bsize; ++ kstatfs->frsize = stbuf->f_frsize; ++ kstatfs->blocks = stbuf->f_blocks; ++ kstatfs->bfree = stbuf->f_bfree; ++ kstatfs->bavail = stbuf->f_bavail; ++ kstatfs->files = stbuf->f_files; ++ kstatfs->ffree = stbuf->f_ffree; ++ kstatfs->namelen = stbuf->f_namemax; + } + + static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) + { +- return send_reply(req, 0, arg, argsize); ++ return send_reply(req, 0, arg, argsize); + } + + int fuse_reply_err(fuse_req_t req, int err) + { +- return send_reply(req, -err, NULL, 0); ++ return send_reply(req, -err, NULL, 0); + } + + void fuse_reply_none(fuse_req_t req) + { +- fuse_free_req(req); ++ fuse_free_req(req); + } + + static unsigned long calc_timeout_sec(double t) + { +- if (t > (double) ULONG_MAX) +- return ULONG_MAX; +- else if (t < 0.0) +- return 0; +- else +- return (unsigned long) t; ++ if (t > (double)ULONG_MAX) { ++ return ULONG_MAX; ++ } else if (t < 0.0) { ++ return 0; ++ } else { ++ return (unsigned long)t; ++ } + } + + static unsigned int calc_timeout_nsec(double t) + { +- double f = t - (double) calc_timeout_sec(t); +- if (f < 0.0) +- return 0; +- else if (f >= 0.999999999) +- return 999999999; +- else +- return (unsigned int) (f * 1.0e9); ++ double f = t - (double)calc_timeout_sec(t); ++ if (f < 0.0) { ++ return 0; ++ } else if (f >= 0.999999999) { ++ return 999999999; ++ } else { ++ return (unsigned int)(f * 1.0e9); ++ } + } + + static void fill_entry(struct fuse_entry_out *arg, +- const struct fuse_entry_param *e) ++ const struct fuse_entry_param *e) + { +- arg->nodeid = e->ino; +- arg->generation = e->generation; +- arg->entry_valid = calc_timeout_sec(e->entry_timeout); +- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); +- arg->attr_valid = calc_timeout_sec(e->attr_timeout); +- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); +- convert_stat(&e->attr, &arg->attr); ++ arg->nodeid = e->ino; ++ arg->generation = e->generation; ++ arg->entry_valid = calc_timeout_sec(e->entry_timeout); ++ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); ++ arg->attr_valid = calc_timeout_sec(e->attr_timeout); ++ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ convert_stat(&e->attr, &arg->attr); + } + +-/* `buf` is allowed to be empty so that the proper size may be +- allocated by the caller */ ++/* ++ * `buf` is allowed to be empty so that the proper size may be ++ * allocated by the caller ++ */ + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, +- const struct fuse_entry_param *e, off_t off) +-{ +- (void)req; +- size_t namelen; +- size_t entlen; +- size_t entlen_padded; +- +- namelen = strlen(name); +- entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; +- entlen_padded = FUSE_DIRENT_ALIGN(entlen); +- if ((buf == NULL) || (entlen_padded > bufsize)) +- return entlen_padded; +- +- struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; +- memset(&dp->entry_out, 0, sizeof(dp->entry_out)); +- fill_entry(&dp->entry_out, e); +- +- struct fuse_dirent *dirent = &dp->dirent; +- dirent->ino = e->attr.st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; +- memcpy(dirent->name, name, namelen); +- memset(dirent->name + namelen, 0, entlen_padded - entlen); +- +- return entlen_padded; +-} +- +-static void fill_open(struct fuse_open_out *arg, +- const struct fuse_file_info *f) +-{ +- arg->fh = f->fh; +- if (f->direct_io) +- arg->open_flags |= FOPEN_DIRECT_IO; +- if (f->keep_cache) +- arg->open_flags |= FOPEN_KEEP_CACHE; +- if (f->cache_readdir) +- arg->open_flags |= FOPEN_CACHE_DIR; +- if (f->nonseekable) +- arg->open_flags |= FOPEN_NONSEEKABLE; ++ const char *name, ++ const struct fuse_entry_param *e, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ if ((buf == NULL) || (entlen_padded > bufsize)) { ++ return entlen_padded; ++ } ++ ++ struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; ++ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); ++ fill_entry(&dp->entry_out, e); ++ ++ struct fuse_dirent *dirent = &dp->dirent; ++ dirent->ino = e->attr.st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) ++{ ++ arg->fh = f->fh; ++ if (f->direct_io) { ++ arg->open_flags |= FOPEN_DIRECT_IO; ++ } ++ if (f->keep_cache) { ++ arg->open_flags |= FOPEN_KEEP_CACHE; ++ } ++ if (f->cache_readdir) { ++ arg->open_flags |= FOPEN_CACHE_DIR; ++ } ++ if (f->nonseekable) { ++ arg->open_flags |= FOPEN_NONSEEKABLE; ++ } + } + + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) + { +- struct fuse_entry_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); ++ struct fuse_entry_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : ++ sizeof(arg); + +- /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant +- negative entry */ +- if (!e->ino && req->se->conn.proto_minor < 4) +- return fuse_reply_err(req, ENOENT); ++ /* ++ * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant ++ * negative entry ++ */ ++ if (!e->ino && req->se->conn.proto_minor < 4) { ++ return fuse_reply_err(req, ENOENT); ++ } + +- memset(&arg, 0, sizeof(arg)); +- fill_entry(&arg, e); +- return send_reply_ok(req, &arg, size); ++ memset(&arg, 0, sizeof(arg)); ++ fill_entry(&arg, e); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *f) ++ const struct fuse_file_info *f) + { +- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; +- size_t entrysize = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); +- struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; +- struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; ++ size_t entrysize = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : ++ sizeof(struct fuse_entry_out); ++ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; ++ struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); + +- memset(buf, 0, sizeof(buf)); +- fill_entry(earg, e); +- fill_open(oarg, f); +- return send_reply_ok(req, buf, +- entrysize + sizeof(struct fuse_open_out)); ++ memset(buf, 0, sizeof(buf)); ++ fill_entry(earg, e); ++ fill_open(oarg, f); ++ return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); + } + + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, +- double attr_timeout) ++ double attr_timeout) + { +- struct fuse_attr_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ struct fuse_attr_out arg; ++ size_t size = ++ req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); + +- memset(&arg, 0, sizeof(arg)); +- arg.attr_valid = calc_timeout_sec(attr_timeout); +- arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); +- convert_stat(attr, &arg.attr); ++ memset(&arg, 0, sizeof(arg)); ++ arg.attr_valid = calc_timeout_sec(attr_timeout); ++ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); ++ convert_stat(attr, &arg.attr); + +- return send_reply_ok(req, &arg, size); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_readlink(fuse_req_t req, const char *linkname) + { +- return send_reply_ok(req, linkname, strlen(linkname)); ++ return send_reply_ok(req, linkname, strlen(linkname)); + } + + int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) + { +- struct fuse_open_out arg; ++ struct fuse_open_out arg; + +- memset(&arg, 0, sizeof(arg)); +- fill_open(&arg, f); +- return send_reply_ok(req, &arg, sizeof(arg)); ++ memset(&arg, 0, sizeof(arg)); ++ fill_open(&arg, f); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_write(fuse_req_t req, size_t count) + { +- struct fuse_write_out arg; ++ struct fuse_write_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.size = count; ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) + { +- return send_reply_ok(req, buf, size); ++ return send_reply_ok(req, buf, size); + } + + static int fuse_send_data_iov_fallback(struct fuse_session *se, +- struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, +- size_t len) ++ struct fuse_chan *ch, struct iovec *iov, ++ int iov_count, struct fuse_bufvec *buf, ++ size_t len) + { +- /* Optimize common case */ +- if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && +- !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { +- /* FIXME: also avoid memory copy if there are multiple buffers +- but none of them contain an fd */ ++ /* Optimize common case */ ++ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && ++ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { ++ /* ++ * FIXME: also avoid memory copy if there are multiple buffers ++ * but none of them contain an fd ++ */ + +- iov[iov_count].iov_base = buf->buf[0].mem; +- iov[iov_count].iov_len = len; +- iov_count++; +- return fuse_send_msg(se, ch, iov, iov_count); +- } ++ iov[iov_count].iov_base = buf->buf[0].mem; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ return fuse_send_msg(se, ch, iov, iov_count); ++ } + +- abort(); /* Will have taken vhost path */ +- return 0; ++ abort(); /* Will have taken vhost path */ ++ return 0; + } + + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) + { +- size_t len = fuse_buf_size(buf); +- (void) flags; ++ size_t len = fuse_buf_size(buf); ++ (void)flags; + +- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } + + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ enum fuse_buf_copy_flags flags) + { +- struct iovec iov[2]; +- struct fuse_out_header out; +- int res; ++ struct iovec iov[2]; ++ struct fuse_out_header out; ++ int res; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- out.unique = req->unique; +- out.error = 0; ++ out.unique = req->unique; ++ out.error = 0; + +- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); +- if (res <= 0) { +- fuse_free_req(req); +- return res; +- } else { +- return fuse_reply_err(req, res); +- } ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ if (res <= 0) { ++ fuse_free_req(req); ++ return res; ++ } else { ++ return fuse_reply_err(req, res); ++ } + } + + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) + { +- struct fuse_statfs_out arg; +- size_t size = req->se->conn.proto_minor < 4 ? +- FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ struct fuse_statfs_out arg; ++ size_t size = ++ req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); + +- memset(&arg, 0, sizeof(arg)); +- convert_statfs(stbuf, &arg.st); ++ memset(&arg, 0, sizeof(arg)); ++ convert_statfs(stbuf, &arg.st); + +- return send_reply_ok(req, &arg, size); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_xattr(fuse_req_t req, size_t count) + { +- struct fuse_getxattr_out arg; ++ struct fuse_getxattr_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.size = count; ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_lock(fuse_req_t req, const struct flock *lock) + { +- struct fuse_lk_out arg; ++ struct fuse_lk_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.lk.type = lock->l_type; +- if (lock->l_type != F_UNLCK) { +- arg.lk.start = lock->l_start; +- if (lock->l_len == 0) +- arg.lk.end = OFFSET_MAX; +- else +- arg.lk.end = lock->l_start + lock->l_len - 1; +- } +- arg.lk.pid = lock->l_pid; +- return send_reply_ok(req, &arg, sizeof(arg)); ++ memset(&arg, 0, sizeof(arg)); ++ arg.lk.type = lock->l_type; ++ if (lock->l_type != F_UNLCK) { ++ arg.lk.start = lock->l_start; ++ if (lock->l_len == 0) { ++ arg.lk.end = OFFSET_MAX; ++ } else { ++ arg.lk.end = lock->l_start + lock->l_len - 1; ++ } ++ } ++ arg.lk.pid = lock->l_pid; ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_bmap(fuse_req_t req, uint64_t idx) + { +- struct fuse_bmap_out arg; ++ struct fuse_bmap_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.block = idx; ++ memset(&arg, 0, sizeof(arg)); ++ arg.block = idx; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, +- size_t count) +-{ +- struct fuse_ioctl_iovec *fiov; +- size_t i; +- +- fiov = malloc(sizeof(fiov[0]) * count); +- if (!fiov) +- return NULL; +- +- for (i = 0; i < count; i++) { +- fiov[i].base = (uintptr_t) iov[i].iov_base; +- fiov[i].len = iov[i].iov_len; +- } +- +- return fiov; +-} +- +-int fuse_reply_ioctl_retry(fuse_req_t req, +- const struct iovec *in_iov, size_t in_count, +- const struct iovec *out_iov, size_t out_count) +-{ +- struct fuse_ioctl_out arg; +- struct fuse_ioctl_iovec *in_fiov = NULL; +- struct fuse_ioctl_iovec *out_fiov = NULL; +- struct iovec iov[4]; +- size_t count = 1; +- int res; +- +- memset(&arg, 0, sizeof(arg)); +- arg.flags |= FUSE_IOCTL_RETRY; +- arg.in_iovs = in_count; +- arg.out_iovs = out_count; +- iov[count].iov_base = &arg; +- iov[count].iov_len = sizeof(arg); +- count++; +- +- if (req->se->conn.proto_minor < 16) { +- if (in_count) { +- iov[count].iov_base = (void *)in_iov; +- iov[count].iov_len = sizeof(in_iov[0]) * in_count; +- count++; +- } +- +- if (out_count) { +- iov[count].iov_base = (void *)out_iov; +- iov[count].iov_len = sizeof(out_iov[0]) * out_count; +- count++; +- } +- } else { +- /* Can't handle non-compat 64bit ioctls on 32bit */ +- if (sizeof(void *) == 4 && req->ioctl_64bit) { +- res = fuse_reply_err(req, EINVAL); +- goto out; +- } +- +- if (in_count) { +- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); +- if (!in_fiov) +- goto enomem; +- +- iov[count].iov_base = (void *)in_fiov; +- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; +- count++; +- } +- if (out_count) { +- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); +- if (!out_fiov) +- goto enomem; +- +- iov[count].iov_base = (void *)out_fiov; +- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; +- count++; +- } +- } +- +- res = send_reply_iov(req, 0, iov, count); ++ size_t count) ++{ ++ struct fuse_ioctl_iovec *fiov; ++ size_t i; ++ ++ fiov = malloc(sizeof(fiov[0]) * count); ++ if (!fiov) { ++ return NULL; ++ } ++ ++ for (i = 0; i < count; i++) { ++ fiov[i].base = (uintptr_t)iov[i].iov_base; ++ fiov[i].len = iov[i].iov_len; ++ } ++ ++ return fiov; ++} ++ ++int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, ++ size_t in_count, const struct iovec *out_iov, ++ size_t out_count) ++{ ++ struct fuse_ioctl_out arg; ++ struct fuse_ioctl_iovec *in_fiov = NULL; ++ struct fuse_ioctl_iovec *out_fiov = NULL; ++ struct iovec iov[4]; ++ size_t count = 1; ++ int res; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.flags |= FUSE_IOCTL_RETRY; ++ arg.in_iovs = in_count; ++ arg.out_iovs = out_count; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (req->se->conn.proto_minor < 16) { ++ if (in_count) { ++ iov[count].iov_base = (void *)in_iov; ++ iov[count].iov_len = sizeof(in_iov[0]) * in_count; ++ count++; ++ } ++ ++ if (out_count) { ++ iov[count].iov_base = (void *)out_iov; ++ iov[count].iov_len = sizeof(out_iov[0]) * out_count; ++ count++; ++ } ++ } else { ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } ++ ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) { ++ goto enomem; ++ } ++ ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) { ++ goto enomem; ++ } ++ ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; ++ } ++ } ++ ++ res = send_reply_iov(req, 0, iov, count); + out: +- free(in_fiov); +- free(out_fiov); ++ free(in_fiov); ++ free(out_fiov); + +- return res; ++ return res; + + enomem: +- res = fuse_reply_err(req, ENOMEM); +- goto out; ++ res = fuse_reply_err(req, ENOMEM); ++ goto out; + } + + int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) + { +- struct fuse_ioctl_out arg; +- struct iovec iov[3]; +- size_t count = 1; ++ struct fuse_ioctl_out arg; ++ struct iovec iov[3]; ++ size_t count = 1; + +- memset(&arg, 0, sizeof(arg)); +- arg.result = result; +- iov[count].iov_base = &arg; +- iov[count].iov_len = sizeof(arg); +- count++; ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; + +- if (size) { +- iov[count].iov_base = (char *) buf; +- iov[count].iov_len = size; +- count++; +- } ++ if (size) { ++ iov[count].iov_base = (char *)buf; ++ iov[count].iov_len = size; ++ count++; ++ } + +- return send_reply_iov(req, 0, iov, count); ++ return send_reply_iov(req, 0, iov, count); + } + + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, +- int count) ++ int count) + { +- struct iovec *padded_iov; +- struct fuse_ioctl_out arg; +- int res; ++ struct iovec *padded_iov; ++ struct fuse_ioctl_out arg; ++ int res; + +- padded_iov = malloc((count + 2) * sizeof(struct iovec)); +- if (padded_iov == NULL) +- return fuse_reply_err(req, ENOMEM); ++ padded_iov = malloc((count + 2) * sizeof(struct iovec)); ++ if (padded_iov == NULL) { ++ return fuse_reply_err(req, ENOMEM); ++ } + +- memset(&arg, 0, sizeof(arg)); +- arg.result = result; +- padded_iov[1].iov_base = &arg; +- padded_iov[1].iov_len = sizeof(arg); ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ padded_iov[1].iov_base = &arg; ++ padded_iov[1].iov_len = sizeof(arg); + +- memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); ++ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); + +- res = send_reply_iov(req, 0, padded_iov, count + 2); +- free(padded_iov); ++ res = send_reply_iov(req, 0, padded_iov, count + 2); ++ free(padded_iov); + +- return res; ++ return res; + } + + int fuse_reply_poll(fuse_req_t req, unsigned revents) + { +- struct fuse_poll_out arg; ++ struct fuse_poll_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.revents = revents; ++ memset(&arg, 0, sizeof(arg)); ++ arg.revents = revents; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_lseek(fuse_req_t req, off_t off) + { +- struct fuse_lseek_out arg; ++ struct fuse_lseek_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.offset = off; ++ memset(&arg, 0, sizeof(arg)); ++ arg.offset = off; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.lookup) +- req->se->op.lookup(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.lookup) { ++ req->se->op.lookup(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; ++ struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; + +- if (req->se->op.forget) +- req->se->op.forget(req, nodeid, arg->nlookup); +- else +- fuse_reply_none(req); ++ if (req->se->op.forget) { ++ req->se->op.forget(req, nodeid, arg->nlookup); ++ } else { ++ fuse_reply_none(req); ++ } + } + + static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg) ++ const void *inarg) + { +- struct fuse_batch_forget_in *arg = (void *) inarg; +- struct fuse_forget_one *param = (void *) PARAM(arg); +- unsigned int i; ++ struct fuse_batch_forget_in *arg = (void *)inarg; ++ struct fuse_forget_one *param = (void *)PARAM(arg); ++ unsigned int i; + +- (void) nodeid; ++ (void)nodeid; + +- if (req->se->op.forget_multi) { +- req->se->op.forget_multi(req, arg->count, +- (struct fuse_forget_data *) param); +- } else if (req->se->op.forget) { +- for (i = 0; i < arg->count; i++) { +- struct fuse_forget_one *forget = ¶m[i]; +- struct fuse_req *dummy_req; ++ if (req->se->op.forget_multi) { ++ req->se->op.forget_multi(req, arg->count, ++ (struct fuse_forget_data *)param); ++ } else if (req->se->op.forget) { ++ for (i = 0; i < arg->count; i++) { ++ struct fuse_forget_one *forget = ¶m[i]; ++ struct fuse_req *dummy_req; + +- dummy_req = fuse_ll_alloc_req(req->se); +- if (dummy_req == NULL) +- break; ++ dummy_req = fuse_ll_alloc_req(req->se); ++ if (dummy_req == NULL) { ++ break; ++ } + +- dummy_req->unique = req->unique; +- dummy_req->ctx = req->ctx; +- dummy_req->ch = NULL; ++ dummy_req->unique = req->unique; ++ dummy_req->ctx = req->ctx; ++ dummy_req->ch = NULL; + +- req->se->op.forget(dummy_req, forget->nodeid, +- forget->nlookup); +- } +- fuse_reply_none(req); +- } else { +- fuse_reply_none(req); +- } ++ req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); ++ } ++ fuse_reply_none(req); ++ } else { ++ fuse_reply_none(req); ++ } + } + + static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_file_info *fip = NULL; +- struct fuse_file_info fi; ++ struct fuse_file_info *fip = NULL; ++ struct fuse_file_info fi; + +- if (req->se->conn.proto_minor >= 9) { +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; ++ if (req->se->conn.proto_minor >= 9) { ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; + +- if (arg->getattr_flags & FUSE_GETATTR_FH) { +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fip = &fi; +- } +- } ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; ++ } ++ } + +- if (req->se->op.getattr) +- req->se->op.getattr(req, nodeid, fip); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.getattr) { ++ req->se->op.getattr(req, nodeid, fip); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; +- +- if (req->se->op.setattr) { +- struct fuse_file_info *fi = NULL; +- struct fuse_file_info fi_store; +- struct stat stbuf; +- memset(&stbuf, 0, sizeof(stbuf)); +- convert_attr(arg, &stbuf); +- if (arg->valid & FATTR_FH) { +- arg->valid &= ~FATTR_FH; +- memset(&fi_store, 0, sizeof(fi_store)); +- fi = &fi_store; +- fi->fh = arg->fh; +- } +- arg->valid &= +- FUSE_SET_ATTR_MODE | +- FUSE_SET_ATTR_UID | +- FUSE_SET_ATTR_GID | +- FUSE_SET_ATTR_SIZE | +- FUSE_SET_ATTR_ATIME | +- FUSE_SET_ATTR_MTIME | +- FUSE_SET_ATTR_ATIME_NOW | +- FUSE_SET_ATTR_MTIME_NOW | +- FUSE_SET_ATTR_CTIME; +- +- req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; ++ ++ if (req->se->op.setattr) { ++ struct fuse_file_info *fi = NULL; ++ struct fuse_file_info fi_store; ++ struct stat stbuf; ++ memset(&stbuf, 0, sizeof(stbuf)); ++ convert_attr(arg, &stbuf); ++ if (arg->valid & FATTR_FH) { ++ arg->valid &= ~FATTR_FH; ++ memset(&fi_store, 0, sizeof(fi_store)); ++ fi = &fi_store; ++ fi->fh = arg->fh; ++ } ++ arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | ++ FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | ++ FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | ++ FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | ++ FUSE_SET_ATTR_CTIME; ++ ++ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_access_in *arg = (struct fuse_access_in *) inarg; ++ struct fuse_access_in *arg = (struct fuse_access_in *)inarg; + +- if (req->se->op.access) +- req->se->op.access(req, nodeid, arg->mask); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.access) { ++ req->se->op.access(req, nodeid, arg->mask); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- (void) inarg; ++ (void)inarg; + +- if (req->se->op.readlink) +- req->se->op.readlink(req, nodeid); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readlink) { ++ req->se->op.readlink(req, nodeid); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; +- char *name = PARAM(arg); ++ struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; ++ char *name = PARAM(arg); + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; +- else +- name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } else { ++ name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ } + +- if (req->se->op.mknod) +- req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.mknod) { ++ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; ++ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } + +- if (req->se->op.mkdir) +- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.mkdir) { ++ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.unlink) +- req->se->op.unlink(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.unlink) { ++ req->se->op.unlink(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.rmdir) +- req->se->op.rmdir(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rmdir) { ++ req->se->op.rmdir(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; +- char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; ++ char *name = (char *)inarg; ++ char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; + +- if (req->se->op.symlink) +- req->se->op.symlink(req, linkname, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.symlink) { ++ req->se->op.symlink(req, linkname, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; + +- if (req->se->op.rename) +- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +- 0); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rename) { ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; + +- if (req->se->op.rename) +- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rename) { ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_link_in *arg = (struct fuse_link_in *) inarg; ++ struct fuse_link_in *arg = (struct fuse_link_in *)inarg; + +- if (req->se->op.link) +- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.link) { ++ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_create_in *arg = (struct fuse_create_in *) inarg; ++ struct fuse_create_in *arg = (struct fuse_create_in *)inarg; + +- if (req->se->op.create) { +- struct fuse_file_info fi; +- char *name = PARAM(arg); ++ if (req->se->op.create) { ++ struct fuse_file_info fi; ++ char *name = PARAM(arg); + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; +- else +- name = (char *) inarg + sizeof(struct fuse_open_in); ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } else { ++ name = (char *)inarg + sizeof(struct fuse_open_in); ++ } + +- req->se->op.create(req, nodeid, name, arg->mode, &fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ req->se->op.create(req, nodeid, name, arg->mode, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->op.open) +- req->se->op.open(req, nodeid, &fi); +- else +- fuse_reply_open(req, &fi); ++ if (req->se->op.open) { ++ req->se->op.open(req, nodeid, &fi); ++ } else { ++ fuse_reply_open(req, &fi); ++ } + } + + static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; + +- if (req->se->op.read) { +- struct fuse_file_info fi; ++ if (req->se->op.read) { ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 9) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- } +- req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 9) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ } ++ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; +- struct fuse_file_info fi; +- char *param; ++ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_file_info fi; ++ char *param; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + +- if (req->se->conn.proto_minor < 9) { +- param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- param = PARAM(arg); +- } ++ if (req->se->conn.proto_minor < 9) { ++ param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); ++ } + +- if (req->se->op.write) +- req->se->op.write(req, nodeid, param, arg->size, +- arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.write) { ++ req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; +- struct fuse_file_info fi; +- +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; +- +- if (se->conn.proto_minor < 9) { +- bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- FUSE_COMPAT_WRITE_IN_SIZE; +- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) +- bufv.buf[0].mem = PARAM(arg); +- +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in); +- } +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; +- } +- bufv.buf[0].size = arg->size; +- +- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ ++ if (se->conn.proto_minor < 9) { ++ bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; ++ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); ++ } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ } ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.flush = 1; +- if (req->se->conn.proto_minor >= 7) +- fi.lock_owner = arg->lock_owner; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.flush = 1; ++ if (req->se->conn.proto_minor >= 7) { ++ fi.lock_owner = arg->lock_owner; ++ } + +- if (req->se->op.flush) +- req->se->op.flush(req, nodeid, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.flush) { ++ req->se->op.flush(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; +- fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 8) { +- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; +- fi.lock_owner = arg->lock_owner; +- } +- if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { +- fi.flock_release = 1; +- fi.lock_owner = arg->lock_owner; +- } ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 8) { ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; ++ } ++ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { ++ fi.flock_release = 1; ++ fi.lock_owner = arg->lock_owner; ++ } + +- if (req->se->op.release) +- req->se->op.release(req, nodeid, &fi); +- else +- fuse_reply_err(req, 0); ++ if (req->se->op.release) { ++ req->se->op.release(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, 0); ++ } + } + + static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; +- struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fsync) +- req->se->op.fsync(req, nodeid, datasync, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fsync) { ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->op.opendir) +- req->se->op.opendir(req, nodeid, &fi); +- else +- fuse_reply_open(req, &fi); ++ if (req->se->op.opendir) { ++ req->se->op.opendir(req, nodeid, &fi); ++ } else { ++ fuse_reply_open(req, &fi); ++ } + } + + static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.readdir) +- req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readdir) { ++ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.readdirplus) +- req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readdirplus) { ++ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; + +- if (req->se->op.releasedir) +- req->se->op.releasedir(req, nodeid, &fi); +- else +- fuse_reply_err(req, 0); ++ if (req->se->op.releasedir) { ++ req->se->op.releasedir(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, 0); ++ } + } + + static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; +- struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fsyncdir) +- req->se->op.fsyncdir(req, nodeid, datasync, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fsyncdir) { ++ req->se->op.fsyncdir(req, nodeid, datasync, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- (void) nodeid; +- (void) inarg; ++ (void)nodeid; ++ (void)inarg; + +- if (req->se->op.statfs) +- req->se->op.statfs(req, nodeid); +- else { +- struct statvfs buf = { +- .f_namemax = 255, +- .f_bsize = 512, +- }; +- fuse_reply_statfs(req, &buf); +- } ++ if (req->se->op.statfs) { ++ req->se->op.statfs(req, nodeid); ++ } else { ++ struct statvfs buf = { ++ .f_namemax = 255, ++ .f_bsize = 512, ++ }; ++ fuse_reply_statfs(req, &buf); ++ } + } + + static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; +- char *name = PARAM(arg); +- char *value = name + strlen(name) + 1; ++ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; ++ char *name = PARAM(arg); ++ char *value = name + strlen(name) + 1; + +- if (req->se->op.setxattr) +- req->se->op.setxattr(req, nodeid, name, value, arg->size, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.setxattr) { ++ req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; + +- if (req->se->op.getxattr) +- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.getxattr) { ++ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; + +- if (req->se->op.listxattr) +- req->se->op.listxattr(req, nodeid, arg->size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.listxattr) { ++ req->se->op.listxattr(req, nodeid, arg->size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.removexattr) +- req->se->op.removexattr(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.removexattr) { ++ req->se->op.removexattr(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void convert_fuse_file_lock(struct fuse_file_lock *fl, +- struct flock *flock) ++ struct flock *flock) + { +- memset(flock, 0, sizeof(struct flock)); +- flock->l_type = fl->type; +- flock->l_whence = SEEK_SET; +- flock->l_start = fl->start; +- if (fl->end == OFFSET_MAX) +- flock->l_len = 0; +- else +- flock->l_len = fl->end - fl->start + 1; +- flock->l_pid = fl->pid; ++ memset(flock, 0, sizeof(struct flock)); ++ flock->l_type = fl->type; ++ flock->l_whence = SEEK_SET; ++ flock->l_start = fl->start; ++ if (fl->end == OFFSET_MAX) { ++ flock->l_len = 0; ++ } else { ++ flock->l_len = fl->end - fl->start + 1; ++ } ++ flock->l_pid = fl->pid; + } + + static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; +- struct fuse_file_info fi; +- struct flock flock; ++ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_file_info fi; ++ struct flock flock; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.lock_owner = arg->owner; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; + +- convert_fuse_file_lock(&arg->lk, &flock); +- if (req->se->op.getlk) +- req->se->op.getlk(req, nodeid, &fi, &flock); +- else +- fuse_reply_err(req, ENOSYS); ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.getlk) { ++ req->se->op.getlk(req, nodeid, &fi, &flock); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, int sleep) +-{ +- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; +- struct fuse_file_info fi; +- struct flock flock; +- +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.lock_owner = arg->owner; +- +- if (arg->lk_flags & FUSE_LK_FLOCK) { +- int op = 0; +- +- switch (arg->lk.type) { +- case F_RDLCK: +- op = LOCK_SH; +- break; +- case F_WRLCK: +- op = LOCK_EX; +- break; +- case F_UNLCK: +- op = LOCK_UN; +- break; +- } +- if (!sleep) +- op |= LOCK_NB; +- +- if (req->se->op.flock) +- req->se->op.flock(req, nodeid, &fi, op); +- else +- fuse_reply_err(req, ENOSYS); +- } else { +- convert_fuse_file_lock(&arg->lk, &flock); +- if (req->se->op.setlk) +- req->se->op.setlk(req, nodeid, &fi, &flock, sleep); +- else +- fuse_reply_err(req, ENOSYS); +- } ++ const void *inarg, int sleep) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ if (arg->lk_flags & FUSE_LK_FLOCK) { ++ int op = 0; ++ ++ switch (arg->lk.type) { ++ case F_RDLCK: ++ op = LOCK_SH; ++ break; ++ case F_WRLCK: ++ op = LOCK_EX; ++ break; ++ case F_UNLCK: ++ op = LOCK_UN; ++ break; ++ } ++ if (!sleep) { ++ op |= LOCK_NB; ++ } ++ ++ if (req->se->op.flock) { ++ req->se->op.flock(req, nodeid, &fi, op); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++ } else { ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.setlk) { ++ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++ } + } + + static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- do_setlk_common(req, nodeid, inarg, 0); ++ do_setlk_common(req, nodeid, inarg, 0); + } + + static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- do_setlk_common(req, nodeid, inarg, 1); ++ do_setlk_common(req, nodeid, inarg, 1); + } + + static int find_interrupted(struct fuse_session *se, struct fuse_req *req) + { +- struct fuse_req *curr; +- +- for (curr = se->list.next; curr != &se->list; curr = curr->next) { +- if (curr->unique == req->u.i.unique) { +- fuse_interrupt_func_t func; +- void *data; +- +- curr->ctr++; +- pthread_mutex_unlock(&se->lock); +- +- /* Ugh, ugly locking */ +- pthread_mutex_lock(&curr->lock); +- pthread_mutex_lock(&se->lock); +- curr->interrupted = 1; +- func = curr->u.ni.func; +- data = curr->u.ni.data; +- pthread_mutex_unlock(&se->lock); +- if (func) +- func(curr, data); +- pthread_mutex_unlock(&curr->lock); +- +- pthread_mutex_lock(&se->lock); +- curr->ctr--; +- if (!curr->ctr) +- destroy_req(curr); +- +- return 1; +- } +- } +- for (curr = se->interrupts.next; curr != &se->interrupts; +- curr = curr->next) { +- if (curr->u.i.unique == req->u.i.unique) +- return 1; +- } +- return 0; ++ struct fuse_req *curr; ++ ++ for (curr = se->list.next; curr != &se->list; curr = curr->next) { ++ if (curr->unique == req->u.i.unique) { ++ fuse_interrupt_func_t func; ++ void *data; ++ ++ curr->ctr++; ++ pthread_mutex_unlock(&se->lock); ++ ++ /* Ugh, ugly locking */ ++ pthread_mutex_lock(&curr->lock); ++ pthread_mutex_lock(&se->lock); ++ curr->interrupted = 1; ++ func = curr->u.ni.func; ++ data = curr->u.ni.data; ++ pthread_mutex_unlock(&se->lock); ++ if (func) { ++ func(curr, data); ++ } ++ pthread_mutex_unlock(&curr->lock); ++ ++ pthread_mutex_lock(&se->lock); ++ curr->ctr--; ++ if (!curr->ctr) { ++ destroy_req(curr); ++ } ++ ++ return 1; ++ } ++ } ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->u.i.unique) { ++ return 1; ++ } ++ } ++ return 0; + } + + static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; +- struct fuse_session *se = req->se; ++ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; ++ struct fuse_session *se = req->se; + +- (void) nodeid; +- if (se->debug) +- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", +- (unsigned long long) arg->unique); ++ (void)nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long)arg->unique); ++ } + +- req->u.i.unique = arg->unique; ++ req->u.i.unique = arg->unique; + +- pthread_mutex_lock(&se->lock); +- if (find_interrupted(se, req)) +- destroy_req(req); +- else +- list_add_req(req, &se->interrupts); +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ if (find_interrupted(se, req)) { ++ destroy_req(req); ++ } else { ++ list_add_req(req, &se->interrupts); ++ } ++ pthread_mutex_unlock(&se->lock); + } + + static struct fuse_req *check_interrupt(struct fuse_session *se, +- struct fuse_req *req) +-{ +- struct fuse_req *curr; +- +- for (curr = se->interrupts.next; curr != &se->interrupts; +- curr = curr->next) { +- if (curr->u.i.unique == req->unique) { +- req->interrupted = 1; +- list_del_req(curr); +- free(curr); +- return NULL; +- } +- } +- curr = se->interrupts.next; +- if (curr != &se->interrupts) { +- list_del_req(curr); +- list_init_req(curr); +- return curr; +- } else +- return NULL; ++ struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->unique) { ++ req->interrupted = 1; ++ list_del_req(curr); ++ free(curr); ++ return NULL; ++ } ++ } ++ curr = se->interrupts.next; ++ if (curr != &se->interrupts) { ++ list_del_req(curr); ++ list_init_req(curr); ++ return curr; ++ } else { ++ return NULL; ++ } + } + + static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; ++ struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; + +- if (req->se->op.bmap) +- req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.bmap) { ++ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; +- unsigned int flags = arg->flags; +- void *in_buf = arg->in_size ? PARAM(arg) : NULL; +- struct fuse_file_info fi; ++ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; ++ unsigned int flags = arg->flags; ++ void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_file_info fi; + +- if (flags & FUSE_IOCTL_DIR && +- !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { +- fuse_reply_err(req, ENOTTY); +- return; +- } ++ if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { ++ fuse_reply_err(req, ENOTTY); ++ return; ++ } + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && +- !(flags & FUSE_IOCTL_32BIT)) { +- req->ioctl_64bit = 1; +- } ++ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && ++ !(flags & FUSE_IOCTL_32BIT)) { ++ req->ioctl_64bit = 1; ++ } + +- if (req->se->op.ioctl) +- req->se->op.ioctl(req, nodeid, arg->cmd, +- (void *)(uintptr_t)arg->arg, &fi, flags, +- in_buf, arg->in_size, arg->out_size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.ioctl) { ++ req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, ++ &fi, flags, in_buf, arg->in_size, arg->out_size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) + { +- free(ph); ++ free(ph); + } + + static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.poll_events = arg->events; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.poll_events = arg->events; + +- if (req->se->op.poll) { +- struct fuse_pollhandle *ph = NULL; ++ if (req->se->op.poll) { ++ struct fuse_pollhandle *ph = NULL; + +- if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { +- ph = malloc(sizeof(struct fuse_pollhandle)); +- if (ph == NULL) { +- fuse_reply_err(req, ENOMEM); +- return; +- } +- ph->kh = arg->kh; +- ph->se = req->se; +- } ++ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { ++ ph = malloc(sizeof(struct fuse_pollhandle)); ++ if (ph == NULL) { ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ph->kh = arg->kh; ++ ph->se = req->se; ++ } + +- req->se->op.poll(req, nodeid, &fi, ph); +- } else { +- fuse_reply_err(req, ENOSYS); +- } ++ req->se->op.poll(req, nodeid, &fi, ph); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fallocate) +- req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fallocate) { ++ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, ++ &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + +-static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) ++static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, ++ const void *inarg) + { +- struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; +- struct fuse_file_info fi_in, fi_out; ++ struct fuse_copy_file_range_in *arg = ++ (struct fuse_copy_file_range_in *)inarg; ++ struct fuse_file_info fi_in, fi_out; + +- memset(&fi_in, 0, sizeof(fi_in)); +- fi_in.fh = arg->fh_in; ++ memset(&fi_in, 0, sizeof(fi_in)); ++ fi_in.fh = arg->fh_in; + +- memset(&fi_out, 0, sizeof(fi_out)); +- fi_out.fh = arg->fh_out; ++ memset(&fi_out, 0, sizeof(fi_out)); ++ fi_out.fh = arg->fh_out; + + +- if (req->se->op.copy_file_range) +- req->se->op.copy_file_range(req, nodeid_in, arg->off_in, +- &fi_in, arg->nodeid_out, +- arg->off_out, &fi_out, arg->len, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.copy_file_range) { ++ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, ++ arg->nodeid_out, arg->off_out, &fi_out, ++ arg->len, arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.lseek) +- req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.lseek) { ++ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_init_in *arg = (struct fuse_init_in *) inarg; +- struct fuse_init_out outarg; +- struct fuse_session *se = req->se; +- size_t bufsize = se->bufsize; +- size_t outargsize = sizeof(outarg); +- +- (void) nodeid; +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); +- if (arg->major == 7 && arg->minor >= 6) { +- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); +- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", +- arg->max_readahead); +- } +- } +- se->conn.proto_major = arg->major; +- se->conn.proto_minor = arg->minor; +- se->conn.capable = 0; +- se->conn.want = 0; +- +- memset(&outarg, 0, sizeof(outarg)); +- outarg.major = FUSE_KERNEL_VERSION; +- outarg.minor = FUSE_KERNEL_MINOR_VERSION; +- +- if (arg->major < 7) { +- fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", +- arg->major, arg->minor); +- fuse_reply_err(req, EPROTO); +- return; +- } +- +- if (arg->major > 7) { +- /* Wait for a second INIT request with a 7.X version */ +- send_reply_ok(req, &outarg, sizeof(outarg)); +- return; +- } +- +- if (arg->minor >= 6) { +- if (arg->max_readahead < se->conn.max_readahead) +- se->conn.max_readahead = arg->max_readahead; +- if (arg->flags & FUSE_ASYNC_READ) +- se->conn.capable |= FUSE_CAP_ASYNC_READ; +- if (arg->flags & FUSE_POSIX_LOCKS) +- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; +- if (arg->flags & FUSE_ATOMIC_O_TRUNC) +- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; +- if (arg->flags & FUSE_EXPORT_SUPPORT) +- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; +- if (arg->flags & FUSE_DONT_MASK) +- se->conn.capable |= FUSE_CAP_DONT_MASK; +- if (arg->flags & FUSE_FLOCK_LOCKS) +- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; +- if (arg->flags & FUSE_AUTO_INVAL_DATA) +- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; +- if (arg->flags & FUSE_DO_READDIRPLUS) +- se->conn.capable |= FUSE_CAP_READDIRPLUS; +- if (arg->flags & FUSE_READDIRPLUS_AUTO) +- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; +- if (arg->flags & FUSE_ASYNC_DIO) +- se->conn.capable |= FUSE_CAP_ASYNC_DIO; +- if (arg->flags & FUSE_WRITEBACK_CACHE) +- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; +- if (arg->flags & FUSE_NO_OPEN_SUPPORT) +- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; +- if (arg->flags & FUSE_PARALLEL_DIROPS) +- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; +- if (arg->flags & FUSE_POSIX_ACL) +- se->conn.capable |= FUSE_CAP_POSIX_ACL; +- if (arg->flags & FUSE_HANDLE_KILLPRIV) +- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; +- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) +- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; +- if (!(arg->flags & FUSE_MAX_PAGES)) { +- size_t max_bufsize = +- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +- + FUSE_BUFFER_HEADER_SIZE; +- if (bufsize > max_bufsize) { +- bufsize = max_bufsize; +- } +- } +- } else { +- se->conn.max_readahead = 0; +- } +- +- if (se->conn.proto_minor >= 14) { ++ struct fuse_init_in *arg = (struct fuse_init_in *)inarg; ++ struct fuse_init_out outarg; ++ struct fuse_session *se = req->se; ++ size_t bufsize = se->bufsize; ++ size_t outargsize = sizeof(outarg); ++ ++ (void)nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", ++ arg->max_readahead); ++ } ++ } ++ se->conn.proto_major = arg->major; ++ se->conn.proto_minor = arg->minor; ++ se->conn.capable = 0; ++ se->conn.want = 0; ++ ++ memset(&outarg, 0, sizeof(outarg)); ++ outarg.major = FUSE_KERNEL_VERSION; ++ outarg.minor = FUSE_KERNEL_MINOR_VERSION; ++ ++ if (arg->major < 7) { ++ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", ++ arg->major, arg->minor); ++ fuse_reply_err(req, EPROTO); ++ return; ++ } ++ ++ if (arg->major > 7) { ++ /* Wait for a second INIT request with a 7.X version */ ++ send_reply_ok(req, &outarg, sizeof(outarg)); ++ return; ++ } ++ ++ if (arg->minor >= 6) { ++ if (arg->max_readahead < se->conn.max_readahead) { ++ se->conn.max_readahead = arg->max_readahead; ++ } ++ if (arg->flags & FUSE_ASYNC_READ) { ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ } ++ if (arg->flags & FUSE_POSIX_LOCKS) { ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ } ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ } ++ if (arg->flags & FUSE_EXPORT_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ if (arg->flags & FUSE_DONT_MASK) { ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ } ++ if (arg->flags & FUSE_FLOCK_LOCKS) { ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ } ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) { ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ } ++ if (arg->flags & FUSE_DO_READDIRPLUS) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ } ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ } ++ if (arg->flags & FUSE_ASYNC_DIO) { ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ } ++ if (arg->flags & FUSE_WRITEBACK_CACHE) { ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ } ++ if (arg->flags & FUSE_PARALLEL_DIROPS) { ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ } ++ if (arg->flags & FUSE_POSIX_ACL) { ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ } ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) { ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ } ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ } ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = ++ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; ++ } ++ } ++ } else { ++ se->conn.max_readahead = 0; ++ } ++ ++ if (se->conn.proto_minor >= 14) { + #ifdef HAVE_SPLICE + #ifdef HAVE_VMSPLICE +- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; + #endif +- se->conn.capable |= FUSE_CAP_SPLICE_READ; ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; + #endif +- } +- if (se->conn.proto_minor >= 18) +- se->conn.capable |= FUSE_CAP_IOCTL_DIR; +- +- /* Default settings for modern filesystems. +- * +- * Most of these capabilities were disabled by default in +- * libfuse2 for backwards compatibility reasons. In libfuse3, +- * we can finally enable them by default (as long as they're +- * supported by the kernel). +- */ +-#define LL_SET_DEFAULT(cond, cap) \ +- if ((cond) && (se->conn.capable & (cap))) \ +- se->conn.want |= (cap) +- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); +- LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); +- LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); +- LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); +- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); +- LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); +- LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); +- LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); +- LL_SET_DEFAULT(se->op.getlk && se->op.setlk, +- FUSE_CAP_POSIX_LOCKS); +- LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); +- LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); +- LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, +- FUSE_CAP_READDIRPLUS_AUTO); +- se->conn.time_gran = 1; +- +- if (bufsize < FUSE_MIN_READ_BUFFER) { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", +- bufsize); +- bufsize = FUSE_MIN_READ_BUFFER; +- } +- se->bufsize = bufsize; +- +- if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) +- se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; +- +- se->got_init = 1; +- if (se->op.init) +- se->op.init(se->userdata, &se->conn); +- +- if (se->conn.want & (~se->conn.capable)) { +- fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " +- "0x%x that are not supported by kernel, aborting.\n", +- se->conn.want & (~se->conn.capable)); +- fuse_reply_err(req, EPROTO); +- se->error = -EPROTO; +- fuse_session_exit(se); +- return; +- } +- +- if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { +- se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; +- } +- if (arg->flags & FUSE_MAX_PAGES) { +- outarg.flags |= FUSE_MAX_PAGES; +- outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; +- } +- +- /* Always enable big writes, this is superseded +- by the max_write option */ +- outarg.flags |= FUSE_BIG_WRITES; +- +- if (se->conn.want & FUSE_CAP_ASYNC_READ) +- outarg.flags |= FUSE_ASYNC_READ; +- if (se->conn.want & FUSE_CAP_POSIX_LOCKS) +- outarg.flags |= FUSE_POSIX_LOCKS; +- if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) +- outarg.flags |= FUSE_ATOMIC_O_TRUNC; +- if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) +- outarg.flags |= FUSE_EXPORT_SUPPORT; +- if (se->conn.want & FUSE_CAP_DONT_MASK) +- outarg.flags |= FUSE_DONT_MASK; +- if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) +- outarg.flags |= FUSE_FLOCK_LOCKS; +- if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) +- outarg.flags |= FUSE_AUTO_INVAL_DATA; +- if (se->conn.want & FUSE_CAP_READDIRPLUS) +- outarg.flags |= FUSE_DO_READDIRPLUS; +- if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) +- outarg.flags |= FUSE_READDIRPLUS_AUTO; +- if (se->conn.want & FUSE_CAP_ASYNC_DIO) +- outarg.flags |= FUSE_ASYNC_DIO; +- if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) +- outarg.flags |= FUSE_WRITEBACK_CACHE; +- if (se->conn.want & FUSE_CAP_POSIX_ACL) +- outarg.flags |= FUSE_POSIX_ACL; +- outarg.max_readahead = se->conn.max_readahead; +- outarg.max_write = se->conn.max_write; +- if (se->conn.proto_minor >= 13) { +- if (se->conn.max_background >= (1 << 16)) +- se->conn.max_background = (1 << 16) - 1; +- if (se->conn.congestion_threshold > se->conn.max_background) +- se->conn.congestion_threshold = se->conn.max_background; +- if (!se->conn.congestion_threshold) { +- se->conn.congestion_threshold = +- se->conn.max_background * 3 / 4; +- } +- +- outarg.max_background = se->conn.max_background; +- outarg.congestion_threshold = se->conn.congestion_threshold; +- } +- if (se->conn.proto_minor >= 23) +- outarg.time_gran = se->conn.time_gran; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); +- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); +- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", +- outarg.max_readahead); +- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); +- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", +- outarg.max_background); +- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", +- outarg.congestion_threshold); +- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", +- outarg.time_gran); +- } +- if (arg->minor < 5) +- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; +- else if (arg->minor < 23) +- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; +- +- send_reply_ok(req, &outarg, outargsize); ++ } ++ if (se->conn.proto_minor >= 18) { ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; ++ } ++ ++ /* ++ * Default settings for modern filesystems. ++ * ++ * Most of these capabilities were disabled by default in ++ * libfuse2 for backwards compatibility reasons. In libfuse3, ++ * we can finally enable them by default (as long as they're ++ * supported by the kernel). ++ */ ++#define LL_SET_DEFAULT(cond, cap) \ ++ if ((cond) && (se->conn.capable & (cap))) \ ++ se->conn.want |= (cap) ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); ++ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); ++ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); ++ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); ++ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); ++ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); ++ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); ++ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); ++ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, ++ FUSE_CAP_READDIRPLUS_AUTO); ++ se->conn.time_gran = 1; ++ ++ if (bufsize < FUSE_MIN_READ_BUFFER) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", ++ bufsize); ++ bufsize = FUSE_MIN_READ_BUFFER; ++ } ++ se->bufsize = bufsize; ++ ++ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; ++ } ++ ++ se->got_init = 1; ++ if (se->op.init) { ++ se->op.init(se->userdata, &se->conn); ++ } ++ ++ if (se->conn.want & (~se->conn.capable)) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: error: filesystem requested capabilities " ++ "0x%x that are not supported by kernel, aborting.\n", ++ se->conn.want & (~se->conn.capable)); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; ++ } ++ if (arg->flags & FUSE_MAX_PAGES) { ++ outarg.flags |= FUSE_MAX_PAGES; ++ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; ++ } ++ ++ /* ++ * Always enable big writes, this is superseded ++ * by the max_write option ++ */ ++ outarg.flags |= FUSE_BIG_WRITES; ++ ++ if (se->conn.want & FUSE_CAP_ASYNC_READ) { ++ outarg.flags |= FUSE_ASYNC_READ; ++ } ++ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { ++ outarg.flags |= FUSE_POSIX_LOCKS; ++ } ++ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) { ++ outarg.flags |= FUSE_ATOMIC_O_TRUNC; ++ } ++ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) { ++ outarg.flags |= FUSE_EXPORT_SUPPORT; ++ } ++ if (se->conn.want & FUSE_CAP_DONT_MASK) { ++ outarg.flags |= FUSE_DONT_MASK; ++ } ++ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) { ++ outarg.flags |= FUSE_FLOCK_LOCKS; ++ } ++ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) { ++ outarg.flags |= FUSE_AUTO_INVAL_DATA; ++ } ++ if (se->conn.want & FUSE_CAP_READDIRPLUS) { ++ outarg.flags |= FUSE_DO_READDIRPLUS; ++ } ++ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) { ++ outarg.flags |= FUSE_READDIRPLUS_AUTO; ++ } ++ if (se->conn.want & FUSE_CAP_ASYNC_DIO) { ++ outarg.flags |= FUSE_ASYNC_DIO; ++ } ++ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) { ++ outarg.flags |= FUSE_WRITEBACK_CACHE; ++ } ++ if (se->conn.want & FUSE_CAP_POSIX_ACL) { ++ outarg.flags |= FUSE_POSIX_ACL; ++ } ++ outarg.max_readahead = se->conn.max_readahead; ++ outarg.max_write = se->conn.max_write; ++ if (se->conn.proto_minor >= 13) { ++ if (se->conn.max_background >= (1 << 16)) { ++ se->conn.max_background = (1 << 16) - 1; ++ } ++ if (se->conn.congestion_threshold > se->conn.max_background) { ++ se->conn.congestion_threshold = se->conn.max_background; ++ } ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; ++ } ++ ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ } ++ if (se->conn.proto_minor >= 23) { ++ outarg.time_gran = se->conn.time_gran; ++ } ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, ++ outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", ++ outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", ++ outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); ++ } ++ if (arg->minor < 5) { ++ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; ++ } else if (arg->minor < 23) { ++ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; ++ } ++ ++ send_reply_ok(req, &outarg, outargsize); + } + + static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_session *se = req->se; ++ struct fuse_session *se = req->se; + +- (void) nodeid; +- (void) inarg; ++ (void)nodeid; ++ (void)inarg; + +- se->got_destroy = 1; +- if (se->op.destroy) +- se->op.destroy(se->userdata); ++ se->got_destroy = 1; ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } + +- send_reply_ok(req, NULL, 0); ++ send_reply_ok(req, NULL, 0); + } + + static void list_del_nreq(struct fuse_notify_req *nreq) + { +- struct fuse_notify_req *prev = nreq->prev; +- struct fuse_notify_req *next = nreq->next; +- prev->next = next; +- next->prev = prev; ++ struct fuse_notify_req *prev = nreq->prev; ++ struct fuse_notify_req *next = nreq->next; ++ prev->next = next; ++ next->prev = prev; + } + + static void list_add_nreq(struct fuse_notify_req *nreq, +- struct fuse_notify_req *next) ++ struct fuse_notify_req *next) + { +- struct fuse_notify_req *prev = next->prev; +- nreq->next = next; +- nreq->prev = prev; +- prev->next = nreq; +- next->prev = nreq; ++ struct fuse_notify_req *prev = next->prev; ++ nreq->next = next; ++ nreq->prev = prev; ++ prev->next = nreq; ++ next->prev = nreq; + } + + static void list_init_nreq(struct fuse_notify_req *nreq) + { +- nreq->next = nreq; +- nreq->prev = nreq; ++ nreq->next = nreq; ++ nreq->prev = nreq; + } + + static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, const struct fuse_buf *buf) ++ const void *inarg, const struct fuse_buf *buf) + { +- struct fuse_session *se = req->se; +- struct fuse_notify_req *nreq; +- struct fuse_notify_req *head; ++ struct fuse_session *se = req->se; ++ struct fuse_notify_req *nreq; ++ struct fuse_notify_req *head; + +- pthread_mutex_lock(&se->lock); +- head = &se->notify_list; +- for (nreq = head->next; nreq != head; nreq = nreq->next) { +- if (nreq->unique == req->unique) { +- list_del_nreq(nreq); +- break; +- } +- } +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ head = &se->notify_list; ++ for (nreq = head->next; nreq != head; nreq = nreq->next) { ++ if (nreq->unique == req->unique) { ++ list_del_nreq(nreq); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&se->lock); + +- if (nreq != head) +- nreq->reply(nreq, req, nodeid, inarg, buf); ++ if (nreq != head) { ++ nreq->reply(nreq, req, nodeid, inarg, buf); ++ } + } + + static int send_notify_iov(struct fuse_session *se, int notify_code, +- struct iovec *iov, int count) ++ struct iovec *iov, int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out; + +- if (!se->got_init) +- return -ENOTCONN; ++ if (!se->got_init) { ++ return -ENOTCONN; ++ } + +- out.unique = 0; +- out.error = notify_code; +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ out.unique = 0; ++ out.error = notify_code; ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- return fuse_send_msg(se, NULL, iov, count); ++ return fuse_send_msg(se, NULL, iov, count); + } + + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + { +- if (ph != NULL) { +- struct fuse_notify_poll_wakeup_out outarg; +- struct iovec iov[2]; ++ if (ph != NULL) { ++ struct fuse_notify_poll_wakeup_out outarg; ++ struct iovec iov[2]; + +- outarg.kh = ph->kh; ++ outarg.kh = ph->kh; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); +- } else { +- return 0; +- } ++ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); ++ } else { ++ return 0; ++ } + } + + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, +- off_t off, off_t len) ++ off_t off, off_t len) + { +- struct fuse_notify_inval_inode_out outarg; +- struct iovec iov[2]; ++ struct fuse_notify_inval_inode_out outarg; ++ struct iovec iov[2]; ++ ++ if (!se) { ++ return -EINVAL; ++ } + +- if (!se) +- return -EINVAL; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { ++ return -ENOSYS; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) +- return -ENOSYS; +- +- outarg.ino = ino; +- outarg.off = off; +- outarg.len = len; ++ outarg.ino = ino; ++ outarg.off = off; ++ outarg.len = len; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); + } + + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, +- const char *name, size_t namelen) ++ const char *name, size_t namelen) + { +- struct fuse_notify_inval_entry_out outarg; +- struct iovec iov[3]; ++ struct fuse_notify_inval_entry_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) { ++ return -EINVAL; ++ } + +- if (!se) +- return -EINVAL; +- +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { ++ return -ENOSYS; ++ } + +- outarg.parent = parent; +- outarg.namelen = namelen; +- outarg.padding = 0; ++ outarg.parent = parent; ++ outarg.namelen = namelen; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- iov[2].iov_base = (void *)name; +- iov[2].iov_len = namelen + 1; ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; + +- return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); + } + +-int fuse_lowlevel_notify_delete(struct fuse_session *se, +- fuse_ino_t parent, fuse_ino_t child, +- const char *name, size_t namelen) ++int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, ++ fuse_ino_t child, const char *name, ++ size_t namelen) + { +- struct fuse_notify_delete_out outarg; +- struct iovec iov[3]; ++ struct fuse_notify_delete_out outarg; ++ struct iovec iov[3]; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { ++ return -ENOSYS; ++ } + +- outarg.parent = parent; +- outarg.child = child; +- outarg.namelen = namelen; +- outarg.padding = 0; ++ outarg.parent = parent; ++ outarg.child = child; ++ outarg.namelen = namelen; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- iov[2].iov_base = (void *)name; +- iov[2].iov_len = namelen + 1; ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; + +- return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); ++ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); + } + + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) + { +- struct fuse_out_header out; +- struct fuse_notify_store_out outarg; +- struct iovec iov[3]; +- size_t size = fuse_buf_size(bufv); +- int res; ++ struct fuse_out_header out; ++ struct fuse_notify_store_out outarg; ++ struct iovec iov[3]; ++ size_t size = fuse_buf_size(bufv); ++ int res; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { ++ return -ENOSYS; ++ } + +- out.unique = 0; +- out.error = FUSE_NOTIFY_STORE; ++ out.unique = 0; ++ out.error = FUSE_NOTIFY_STORE; + +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(out); +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(out); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); +- if (res > 0) +- res = -res; ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ if (res > 0) { ++ res = -res; ++ } + +- return res; ++ return res; + } + + struct fuse_retrieve_req { +- struct fuse_notify_req nreq; +- void *cookie; ++ struct fuse_notify_req nreq; ++ void *cookie; + }; + +-static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, +- fuse_req_t req, fuse_ino_t ino, +- const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_retrieve_req *rreq = +- container_of(nreq, struct fuse_retrieve_req, nreq); +- const struct fuse_notify_retrieve_in *arg = inarg; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) +- bufv.buf[0].mem = PARAM(arg); +- +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- sizeof(struct fuse_notify_retrieve_in); +- +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); +- fuse_reply_none(req); +- goto out; +- } +- bufv.buf[0].size = arg->size; +- +- if (se->op.retrieve_reply) { +- se->op.retrieve_reply(req, rreq->cookie, ino, +- arg->offset, &bufv); +- } else { +- fuse_reply_none(req); +- } ++static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, ++ fuse_ino_t ino, const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_retrieve_req *rreq = ++ container_of(nreq, struct fuse_retrieve_req, nreq); ++ const struct fuse_notify_retrieve_in *arg = inarg; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); ++ } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); ++ ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); ++ fuse_reply_none(req); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ if (se->op.retrieve_reply) { ++ se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); ++ } else { ++ fuse_reply_none(req); ++ } + out: +- free(rreq); ++ free(rreq); + } + + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie) ++ size_t size, off_t offset, void *cookie) + { +- struct fuse_notify_retrieve_out outarg; +- struct iovec iov[2]; +- struct fuse_retrieve_req *rreq; +- int err; ++ struct fuse_notify_retrieve_out outarg; ++ struct iovec iov[2]; ++ struct fuse_retrieve_req *rreq; ++ int err; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { ++ return -ENOSYS; ++ } + +- rreq = malloc(sizeof(*rreq)); +- if (rreq == NULL) +- return -ENOMEM; ++ rreq = malloc(sizeof(*rreq)); ++ if (rreq == NULL) { ++ return -ENOMEM; ++ } + +- pthread_mutex_lock(&se->lock); +- rreq->cookie = cookie; +- rreq->nreq.unique = se->notify_ctr++; +- rreq->nreq.reply = fuse_ll_retrieve_reply; +- list_add_nreq(&rreq->nreq, &se->notify_list); +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ rreq->cookie = cookie; ++ rreq->nreq.unique = se->notify_ctr++; ++ rreq->nreq.reply = fuse_ll_retrieve_reply; ++ list_add_nreq(&rreq->nreq, &se->notify_list); ++ pthread_mutex_unlock(&se->lock); + +- outarg.notify_unique = rreq->nreq.unique; +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; ++ outarg.notify_unique = rreq->nreq.unique; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); +- if (err) { +- pthread_mutex_lock(&se->lock); +- list_del_nreq(&rreq->nreq); +- pthread_mutex_unlock(&se->lock); +- free(rreq); +- } ++ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); ++ if (err) { ++ pthread_mutex_lock(&se->lock); ++ list_del_nreq(&rreq->nreq); ++ pthread_mutex_unlock(&se->lock); ++ free(rreq); ++ } + +- return err; ++ return err; + } + + void *fuse_req_userdata(fuse_req_t req) + { +- return req->se->userdata; ++ return req->se->userdata; + } + + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) + { +- return &req->ctx; ++ return &req->ctx; + } + + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, +- void *data) ++ void *data) + { +- pthread_mutex_lock(&req->lock); +- pthread_mutex_lock(&req->se->lock); +- req->u.ni.func = func; +- req->u.ni.data = data; +- pthread_mutex_unlock(&req->se->lock); +- if (req->interrupted && func) +- func(req, data); +- pthread_mutex_unlock(&req->lock); ++ pthread_mutex_lock(&req->lock); ++ pthread_mutex_lock(&req->se->lock); ++ req->u.ni.func = func; ++ req->u.ni.data = data; ++ pthread_mutex_unlock(&req->se->lock); ++ if (req->interrupted && func) { ++ func(req, data); ++ } ++ pthread_mutex_unlock(&req->lock); + } + + int fuse_req_interrupted(fuse_req_t req) + { +- int interrupted; ++ int interrupted; + +- pthread_mutex_lock(&req->se->lock); +- interrupted = req->interrupted; +- pthread_mutex_unlock(&req->se->lock); ++ pthread_mutex_lock(&req->se->lock); ++ interrupted = req->interrupted; ++ pthread_mutex_unlock(&req->se->lock); + +- return interrupted; ++ return interrupted; + } + + static struct { +- void (*func)(fuse_req_t, fuse_ino_t, const void *); +- const char *name; ++ void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ const char *name; + } fuse_ll_ops[] = { +- [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, +- [FUSE_FORGET] = { do_forget, "FORGET" }, +- [FUSE_GETATTR] = { do_getattr, "GETATTR" }, +- [FUSE_SETATTR] = { do_setattr, "SETATTR" }, +- [FUSE_READLINK] = { do_readlink, "READLINK" }, +- [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, +- [FUSE_MKNOD] = { do_mknod, "MKNOD" }, +- [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, +- [FUSE_UNLINK] = { do_unlink, "UNLINK" }, +- [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, +- [FUSE_RENAME] = { do_rename, "RENAME" }, +- [FUSE_LINK] = { do_link, "LINK" }, +- [FUSE_OPEN] = { do_open, "OPEN" }, +- [FUSE_READ] = { do_read, "READ" }, +- [FUSE_WRITE] = { do_write, "WRITE" }, +- [FUSE_STATFS] = { do_statfs, "STATFS" }, +- [FUSE_RELEASE] = { do_release, "RELEASE" }, +- [FUSE_FSYNC] = { do_fsync, "FSYNC" }, +- [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, +- [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, +- [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, +- [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, +- [FUSE_FLUSH] = { do_flush, "FLUSH" }, +- [FUSE_INIT] = { do_init, "INIT" }, +- [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, +- [FUSE_READDIR] = { do_readdir, "READDIR" }, +- [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, +- [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, +- [FUSE_GETLK] = { do_getlk, "GETLK" }, +- [FUSE_SETLK] = { do_setlk, "SETLK" }, +- [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, +- [FUSE_ACCESS] = { do_access, "ACCESS" }, +- [FUSE_CREATE] = { do_create, "CREATE" }, +- [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, +- [FUSE_BMAP] = { do_bmap, "BMAP" }, +- [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, +- [FUSE_POLL] = { do_poll, "POLL" }, +- [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, +- [FUSE_DESTROY] = { do_destroy, "DESTROY" }, +- [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, +- [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, +- [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, +- [FUSE_RENAME2] = { do_rename2, "RENAME2" }, +- [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, +- [FUSE_LSEEK] = { do_lseek, "LSEEK" }, ++ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, ++ [FUSE_FORGET] = { do_forget, "FORGET" }, ++ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, ++ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, ++ [FUSE_READLINK] = { do_readlink, "READLINK" }, ++ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, ++ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, ++ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, ++ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, ++ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, ++ [FUSE_RENAME] = { do_rename, "RENAME" }, ++ [FUSE_LINK] = { do_link, "LINK" }, ++ [FUSE_OPEN] = { do_open, "OPEN" }, ++ [FUSE_READ] = { do_read, "READ" }, ++ [FUSE_WRITE] = { do_write, "WRITE" }, ++ [FUSE_STATFS] = { do_statfs, "STATFS" }, ++ [FUSE_RELEASE] = { do_release, "RELEASE" }, ++ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, ++ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, ++ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, ++ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, ++ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, ++ [FUSE_FLUSH] = { do_flush, "FLUSH" }, ++ [FUSE_INIT] = { do_init, "INIT" }, ++ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, ++ [FUSE_READDIR] = { do_readdir, "READDIR" }, ++ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, ++ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, ++ [FUSE_GETLK] = { do_getlk, "GETLK" }, ++ [FUSE_SETLK] = { do_setlk, "SETLK" }, ++ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, ++ [FUSE_ACCESS] = { do_access, "ACCESS" }, ++ [FUSE_CREATE] = { do_create, "CREATE" }, ++ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, ++ [FUSE_BMAP] = { do_bmap, "BMAP" }, ++ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, ++ [FUSE_POLL] = { do_poll, "POLL" }, ++ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, ++ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, ++ [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, ++ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, ++ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, ++ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, ++ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, ++ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, + }; + + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) + + static const char *opname(enum fuse_opcode opcode) + { +- if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) +- return "???"; +- else +- return fuse_ll_ops[opcode].name; ++ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) { ++ return "???"; ++ } else { ++ return fuse_ll_ops[opcode].name; ++ } + } + + void fuse_session_process_buf(struct fuse_session *se, +- const struct fuse_buf *buf) ++ const struct fuse_buf *buf) + { +- fuse_session_process_buf_int(se, buf, NULL); ++ fuse_session_process_buf_int(se, buf, NULL); + } + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, struct fuse_chan *ch) +-{ +- struct fuse_in_header *in; +- const void *inarg; +- struct fuse_req *req; +- int err; +- +- in = buf->mem; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", +- (unsigned long long) in->unique, +- opname((enum fuse_opcode) in->opcode), in->opcode, +- (unsigned long long) in->nodeid, buf->size, in->pid); +- } +- +- req = fuse_ll_alloc_req(se); +- if (req == NULL) { +- struct fuse_out_header out = { +- .unique = in->unique, +- .error = -ENOMEM, +- }; +- struct iovec iov = { +- .iov_base = &out, +- .iov_len = sizeof(struct fuse_out_header), +- }; +- +- fuse_send_msg(se, ch, &iov, 1); +- return; +- } +- +- req->unique = in->unique; +- req->ctx.uid = in->uid; +- req->ctx.gid = in->gid; +- req->ctx.pid = in->pid; +- req->ch = ch; +- +- err = EIO; +- if (!se->got_init) { +- enum fuse_opcode expected; +- +- expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; +- if (in->opcode != expected) +- goto reply_err; +- } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) +- goto reply_err; +- +- err = EACCES; +- /* Implement -o allow_root */ +- if (se->deny_others && in->uid != se->owner && in->uid != 0 && +- in->opcode != FUSE_INIT && in->opcode != FUSE_READ && +- in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && +- in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && +- in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && +- in->opcode != FUSE_NOTIFY_REPLY && +- in->opcode != FUSE_READDIRPLUS) +- goto reply_err; +- +- err = ENOSYS; +- if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) +- goto reply_err; +- if (in->opcode != FUSE_INTERRUPT) { +- struct fuse_req *intr; +- pthread_mutex_lock(&se->lock); +- intr = check_interrupt(se, req); +- list_add_req(req, &se->list); +- pthread_mutex_unlock(&se->lock); +- if (intr) +- fuse_reply_err(intr, EAGAIN); +- } +- +- inarg = (void *) &in[1]; +- if (in->opcode == FUSE_WRITE && se->op.write_buf) +- do_write_buf(req, in->nodeid, inarg, buf); +- else if (in->opcode == FUSE_NOTIFY_REPLY) +- do_notify_reply(req, in->nodeid, inarg, buf); +- else +- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); +- +- return; ++ const struct fuse_buf *buf, ++ struct fuse_chan *ch) ++{ ++ struct fuse_in_header *in; ++ const void *inarg; ++ struct fuse_req *req; ++ int err; ++ ++ in = buf->mem; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " ++ "pid: %u\n", ++ (unsigned long long)in->unique, ++ opname((enum fuse_opcode)in->opcode), in->opcode, ++ (unsigned long long)in->nodeid, buf->size, in->pid); ++ } ++ ++ req = fuse_ll_alloc_req(se); ++ if (req == NULL) { ++ struct fuse_out_header out = { ++ .unique = in->unique, ++ .error = -ENOMEM, ++ }; ++ struct iovec iov = { ++ .iov_base = &out, ++ .iov_len = sizeof(struct fuse_out_header), ++ }; ++ ++ fuse_send_msg(se, ch, &iov, 1); ++ return; ++ } ++ ++ req->unique = in->unique; ++ req->ctx.uid = in->uid; ++ req->ctx.gid = in->gid; ++ req->ctx.pid = in->pid; ++ req->ch = ch; ++ ++ err = EIO; ++ if (!se->got_init) { ++ enum fuse_opcode expected; ++ ++ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; ++ if (in->opcode != expected) { ++ goto reply_err; ++ } ++ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { ++ goto reply_err; ++ } ++ ++ err = EACCES; ++ /* Implement -o allow_root */ ++ if (se->deny_others && in->uid != se->owner && in->uid != 0 && ++ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && ++ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && ++ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && ++ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && ++ in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) { ++ goto reply_err; ++ } ++ ++ err = ENOSYS; ++ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) { ++ goto reply_err; ++ } ++ if (in->opcode != FUSE_INTERRUPT) { ++ struct fuse_req *intr; ++ pthread_mutex_lock(&se->lock); ++ intr = check_interrupt(se, req); ++ list_add_req(req, &se->list); ++ pthread_mutex_unlock(&se->lock); ++ if (intr) { ++ fuse_reply_err(intr, EAGAIN); ++ } ++ } ++ ++ inarg = (void *)&in[1]; ++ if (in->opcode == FUSE_WRITE && se->op.write_buf) { ++ do_write_buf(req, in->nodeid, inarg, buf); ++ } else if (in->opcode == FUSE_NOTIFY_REPLY) { ++ do_notify_reply(req, in->nodeid, inarg, buf); ++ } else { ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ } ++ ++ return; + + reply_err: +- fuse_reply_err(req, err); ++ fuse_reply_err(req, err); + } + +-#define LL_OPTION(n,o,v) \ +- { n, offsetof(struct fuse_session, o), v } ++#define LL_OPTION(n, o, v) \ ++ { \ ++ n, offsetof(struct fuse_session, o), v \ ++ } + + static const struct fuse_opt fuse_ll_opts[] = { +- LL_OPTION("debug", debug, 1), +- LL_OPTION("-d", debug, 1), +- LL_OPTION("--debug", debug, 1), +- LL_OPTION("allow_root", deny_others, 1), +- FUSE_OPT_END ++ LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), ++ FUSE_OPT_END + }; + + void fuse_lowlevel_version(void) + { +- printf("using FUSE kernel interface version %i.%i\n", +- FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); ++ printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION, ++ FUSE_KERNEL_MINOR_VERSION); + } + + void fuse_lowlevel_help(void) + { +- /* These are not all options, but the ones that are +- potentially of interest to an end-user */ +- printf( +-" -o allow_root allow access by root\n" +-); ++ /* ++ * These are not all options, but the ones that are ++ * potentially of interest to an end-user ++ */ ++ printf(" -o allow_root allow access by root\n"); + } + + void fuse_session_destroy(struct fuse_session *se) + { +- if (se->got_init && !se->got_destroy) { +- if (se->op.destroy) +- se->op.destroy(se->userdata); +- } +- pthread_mutex_destroy(&se->lock); +- free(se->cuse_data); +- if (se->fd != -1) +- close(se->fd); +- free(se); ++ if (se->got_init && !se->got_destroy) { ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } ++ } ++ pthread_mutex_destroy(&se->lock); ++ free(se->cuse_data); ++ if (se->fd != -1) { ++ close(se->fd); ++ } ++ free(se); + } + + + struct fuse_session *fuse_session_new(struct fuse_args *args, +- const struct fuse_lowlevel_ops *op, +- size_t op_size, void *userdata) +-{ +- struct fuse_session *se; +- +- if (sizeof(struct fuse_lowlevel_ops) < op_size) { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); +- op_size = sizeof(struct fuse_lowlevel_ops); +- } +- +- if (args->argc == 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); +- return NULL; +- } +- +- se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); +- if (se == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); +- goto out1; +- } +- se->fd = -1; +- se->conn.max_write = UINT_MAX; +- se->conn.max_readahead = UINT_MAX; +- +- /* Parse options */ +- if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) +- goto out2; +- if(args->argc == 1 && +- args->argv[0][0] == '-') { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " +- "will be ignored\n"); +- } else if (args->argc != 1) { +- int i; +- fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); +- for(i = 1; i < args->argc-1; i++) +- fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); +- fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); +- goto out4; +- } +- +- se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + +- FUSE_BUFFER_HEADER_SIZE; +- +- list_init_req(&se->list); +- list_init_req(&se->interrupts); +- list_init_nreq(&se->notify_list); +- se->notify_ctr = 1; +- fuse_mutex_init(&se->lock); +- +- memcpy(&se->op, op, op_size); +- se->owner = getuid(); +- se->userdata = userdata; +- +- return se; ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata) ++{ ++ struct fuse_session *se; ++ ++ if (sizeof(struct fuse_lowlevel_ops) < op_size) { ++ fuse_log( ++ FUSE_LOG_ERR, ++ "fuse: warning: library too old, some operations may not work\n"); ++ op_size = sizeof(struct fuse_lowlevel_ops); ++ } ++ ++ if (args->argc == 0) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: empty argv passed to fuse_session_new().\n"); ++ return NULL; ++ } ++ ++ se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session)); ++ if (se == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); ++ goto out1; ++ } ++ se->fd = -1; ++ se->conn.max_write = UINT_MAX; ++ se->conn.max_readahead = UINT_MAX; ++ ++ /* Parse options */ ++ if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) { ++ goto out2; ++ } ++ if (args->argc == 1 && args->argv[0][0] == '-') { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: warning: argv[0] looks like an option, but " ++ "will be ignored\n"); ++ } else if (args->argc != 1) { ++ int i; ++ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); ++ for (i = 1; i < args->argc - 1; i++) { ++ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); ++ } ++ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); ++ goto out4; ++ } ++ ++ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; ++ ++ list_init_req(&se->list); ++ list_init_req(&se->interrupts); ++ list_init_nreq(&se->notify_list); ++ se->notify_ctr = 1; ++ fuse_mutex_init(&se->lock); ++ ++ memcpy(&se->op, op, op_size); ++ se->owner = getuid(); ++ se->userdata = userdata; ++ ++ return se; + + out4: +- fuse_opt_free_args(args); ++ fuse_opt_free_args(args); + out2: +- free(se); ++ free(se); + out1: +- return NULL; ++ return NULL; + } + + int fuse_session_mount(struct fuse_session *se, const char *mountpoint) + { +- int fd; +- +- /* +- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos +- * would ensue. +- */ +- do { +- fd = open("/dev/null", O_RDWR); +- if (fd > 2) +- close(fd); +- } while (fd >= 0 && fd <= 2); +- +- /* +- * To allow FUSE daemons to run without privileges, the caller may open +- * /dev/fuse before launching the file system and pass on the file +- * descriptor by specifying /dev/fd/N as the mount point. Note that the +- * parent process takes care of performing the mount in this case. +- */ +- fd = fuse_mnt_parse_fuse_fd(mountpoint); +- if (fd != -1) { +- if (fcntl(fd, F_GETFD) == -1) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: Invalid file descriptor /dev/fd/%u\n", +- fd); +- return -1; +- } +- se->fd = fd; +- return 0; +- } +- +- /* Open channel */ +- fd = fuse_kern_mount(mountpoint, se->mo); +- if (fd == -1) +- return -1; +- se->fd = fd; +- +- /* Save mountpoint */ +- se->mountpoint = strdup(mountpoint); +- if (se->mountpoint == NULL) +- goto error_out; +- +- return 0; ++ int fd; ++ ++ /* ++ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos ++ * would ensue. ++ */ ++ do { ++ fd = open("/dev/null", O_RDWR); ++ if (fd > 2) { ++ close(fd); ++ } ++ } while (fd >= 0 && fd <= 2); ++ ++ /* ++ * To allow FUSE daemons to run without privileges, the caller may open ++ * /dev/fuse before launching the file system and pass on the file ++ * descriptor by specifying /dev/fd/N as the mount point. Note that the ++ * parent process takes care of performing the mount in this case. ++ */ ++ fd = fuse_mnt_parse_fuse_fd(mountpoint); ++ if (fd != -1) { ++ if (fcntl(fd, F_GETFD) == -1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", ++ fd); ++ return -1; ++ } ++ se->fd = fd; ++ return 0; ++ } ++ ++ /* Open channel */ ++ fd = fuse_kern_mount(mountpoint, se->mo); ++ if (fd == -1) { ++ return -1; ++ } ++ se->fd = fd; ++ ++ /* Save mountpoint */ ++ se->mountpoint = strdup(mountpoint); ++ if (se->mountpoint == NULL) { ++ goto error_out; ++ } ++ ++ return 0; + + error_out: +- fuse_kern_unmount(mountpoint, fd); +- return -1; ++ fuse_kern_unmount(mountpoint, fd); ++ return -1; + } + + int fuse_session_fd(struct fuse_session *se) + { +- return se->fd; ++ return se->fd; + } + + void fuse_session_unmount(struct fuse_session *se) +@@ -2384,61 +2519,66 @@ void fuse_session_unmount(struct fuse_session *se) + #ifdef linux + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +- char *buf; +- size_t bufsize = 1024; +- char path[128]; +- int ret; +- int fd; +- unsigned long pid = req->ctx.pid; +- char *s; ++ char *buf; ++ size_t bufsize = 1024; ++ char path[128]; ++ int ret; ++ int fd; ++ unsigned long pid = req->ctx.pid; ++ char *s; + +- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); ++ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); + + retry: +- buf = malloc(bufsize); +- if (buf == NULL) +- return -ENOMEM; +- +- ret = -EIO; +- fd = open(path, O_RDONLY); +- if (fd == -1) +- goto out_free; +- +- ret = read(fd, buf, bufsize); +- close(fd); +- if (ret < 0) { +- ret = -EIO; +- goto out_free; +- } +- +- if ((size_t)ret == bufsize) { +- free(buf); +- bufsize *= 4; +- goto retry; +- } +- +- ret = -EIO; +- s = strstr(buf, "\nGroups:"); +- if (s == NULL) +- goto out_free; +- +- s += 8; +- ret = 0; +- while (1) { +- char *end; +- unsigned long val = strtoul(s, &end, 0); +- if (end == s) +- break; +- +- s = end; +- if (ret < size) +- list[ret] = val; +- ret++; +- } ++ buf = malloc(bufsize); ++ if (buf == NULL) { ++ return -ENOMEM; ++ } ++ ++ ret = -EIO; ++ fd = open(path, O_RDONLY); ++ if (fd == -1) { ++ goto out_free; ++ } ++ ++ ret = read(fd, buf, bufsize); ++ close(fd); ++ if (ret < 0) { ++ ret = -EIO; ++ goto out_free; ++ } ++ ++ if ((size_t)ret == bufsize) { ++ free(buf); ++ bufsize *= 4; ++ goto retry; ++ } ++ ++ ret = -EIO; ++ s = strstr(buf, "\nGroups:"); ++ if (s == NULL) { ++ goto out_free; ++ } ++ ++ s += 8; ++ ret = 0; ++ while (1) { ++ char *end; ++ unsigned long val = strtoul(s, &end, 0); ++ if (end == s) { ++ break; ++ } ++ ++ s = end; ++ if (ret < size) { ++ list[ret] = val; ++ } ++ ret++; ++ } + + out_free: +- free(buf); +- return ret; ++ free(buf); ++ return ret; + } + #else /* linux */ + /* +@@ -2446,23 +2586,25 @@ out_free: + */ + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +- (void) req; (void) size; (void) list; +- return -ENOSYS; ++ (void)req; ++ (void)size; ++ (void)list; ++ return -ENOSYS; + } + #endif + + void fuse_session_exit(struct fuse_session *se) + { +- se->exited = 1; ++ se->exited = 1; + } + + void fuse_session_reset(struct fuse_session *se) + { +- se->exited = 0; +- se->error = 0; ++ se->exited = 0; ++ se->error = 0; + } + + int fuse_session_exited(struct fuse_session *se) + { +- return se->exited; ++ return se->exited; + } +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 6b1adfc..adb9054 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1,15 +1,16 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_LOWLEVEL_H_ + #define FUSE_LOWLEVEL_H_ + +-/** @file ++/** ++ * @file + * + * Low level API + * +@@ -24,16 +25,16 @@ + + #include "fuse_common.h" + +-#include + #include +-#include + #include + #include ++#include + #include ++#include + +-/* ----------------------------------------------------------- * +- * Miscellaneous definitions * +- * ----------------------------------------------------------- */ ++/* ++ * Miscellaneous definitions ++ */ + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -53,47 +54,54 @@ struct fuse_session; + + /** Directory entry parameters supplied to fuse_reply_entry() */ + struct fuse_entry_param { +- /** Unique inode number +- * +- * In lookup, zero means negative entry (from version 2.5) +- * Returning ENOENT also means negative entry, but by setting zero +- * ino the kernel may cache negative entries for entry_timeout +- * seconds. +- */ +- fuse_ino_t ino; +- +- /** Generation number for this entry. +- * +- * If the file system will be exported over NFS, the +- * ino/generation pairs need to be unique over the file +- * system's lifetime (rather than just the mount time). So if +- * the file system reuses an inode after it has been deleted, +- * it must assign a new, previously unused generation number +- * to the inode at the same time. +- * +- */ +- uint64_t generation; +- +- /** Inode attributes. +- * +- * Even if attr_timeout == 0, attr must be correct. For example, +- * for open(), FUSE uses attr.st_size from lookup() to determine +- * how many bytes to request. If this value is not correct, +- * incorrect data will be returned. +- */ +- struct stat attr; +- +- /** Validity timeout (in seconds) for inode attributes. If +- attributes only change as a result of requests that come +- through the kernel, this should be set to a very large +- value. */ +- double attr_timeout; +- +- /** Validity timeout (in seconds) for the name. If directory +- entries are changed/deleted only as a result of requests +- that come through the kernel, this should be set to a very +- large value. */ +- double entry_timeout; ++ /** ++ * Unique inode number ++ * ++ * In lookup, zero means negative entry (from version 2.5) ++ * Returning ENOENT also means negative entry, but by setting zero ++ * ino the kernel may cache negative entries for entry_timeout ++ * seconds. ++ */ ++ fuse_ino_t ino; ++ ++ /** ++ * Generation number for this entry. ++ * ++ * If the file system will be exported over NFS, the ++ * ino/generation pairs need to be unique over the file ++ * system's lifetime (rather than just the mount time). So if ++ * the file system reuses an inode after it has been deleted, ++ * it must assign a new, previously unused generation number ++ * to the inode at the same time. ++ * ++ */ ++ uint64_t generation; ++ ++ /** ++ * Inode attributes. ++ * ++ * Even if attr_timeout == 0, attr must be correct. For example, ++ * for open(), FUSE uses attr.st_size from lookup() to determine ++ * how many bytes to request. If this value is not correct, ++ * incorrect data will be returned. ++ */ ++ struct stat attr; ++ ++ /** ++ * Validity timeout (in seconds) for inode attributes. If ++ * attributes only change as a result of requests that come ++ * through the kernel, this should be set to a very large ++ * value. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Validity timeout (in seconds) for the name. If directory ++ * entries are changed/deleted only as a result of requests ++ * that come through the kernel, this should be set to a very ++ * large value. ++ */ ++ double entry_timeout; + }; + + /** +@@ -105,38 +113,38 @@ struct fuse_entry_param { + * there is no valid uid/pid/gid that could be reported. + */ + struct fuse_ctx { +- /** User ID of the calling process */ +- uid_t uid; ++ /** User ID of the calling process */ ++ uid_t uid; + +- /** Group ID of the calling process */ +- gid_t gid; ++ /** Group ID of the calling process */ ++ gid_t gid; + +- /** Thread ID of the calling process */ +- pid_t pid; ++ /** Thread ID of the calling process */ ++ pid_t pid; + +- /** Umask of the calling process */ +- mode_t umask; ++ /** Umask of the calling process */ ++ mode_t umask; + }; + + struct fuse_forget_data { +- fuse_ino_t ino; +- uint64_t nlookup; ++ fuse_ino_t ino; ++ uint64_t nlookup; + }; + + /* 'to_set' flags in setattr */ +-#define FUSE_SET_ATTR_MODE (1 << 0) +-#define FUSE_SET_ATTR_UID (1 << 1) +-#define FUSE_SET_ATTR_GID (1 << 2) +-#define FUSE_SET_ATTR_SIZE (1 << 3) +-#define FUSE_SET_ATTR_ATIME (1 << 4) +-#define FUSE_SET_ATTR_MTIME (1 << 5) +-#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) +-#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) +-#define FUSE_SET_ATTR_CTIME (1 << 10) +- +-/* ----------------------------------------------------------- * +- * Request methods and replies * +- * ----------------------------------------------------------- */ ++#define FUSE_SET_ATTR_MODE (1 << 0) ++#define FUSE_SET_ATTR_UID (1 << 1) ++#define FUSE_SET_ATTR_GID (1 << 2) ++#define FUSE_SET_ATTR_SIZE (1 << 3) ++#define FUSE_SET_ATTR_ATIME (1 << 4) ++#define FUSE_SET_ATTR_MTIME (1 << 5) ++#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) ++#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) ++#define FUSE_SET_ATTR_CTIME (1 << 10) ++ ++/* ++ * Request methods and replies ++ */ + + /** + * Low level filesystem operations +@@ -166,1075 +174,1069 @@ struct fuse_forget_data { + * this file will not be called. + */ + struct fuse_lowlevel_ops { +- /** +- * Initialize filesystem +- * +- * This function is called when libfuse establishes +- * communication with the FUSE kernel module. The file system +- * should use this module to inspect and/or modify the +- * connection parameters provided in the `conn` structure. +- * +- * Note that some parameters may be overwritten by options +- * passed to fuse_session_new() which take precedence over the +- * values set in this handler. +- * +- * There's no reply to this function +- * +- * @param userdata the user data passed to fuse_session_new() +- */ +- void (*init) (void *userdata, struct fuse_conn_info *conn); +- +- /** +- * Clean up filesystem. +- * +- * Called on filesystem exit. When this method is called, the +- * connection to the kernel may be gone already, so that eg. calls +- * to fuse_lowlevel_notify_* will fail. +- * +- * There's no reply to this function +- * +- * @param userdata the user data passed to fuse_session_new() +- */ +- void (*destroy) (void *userdata); +- +- /** +- * Look up a directory entry by name and get its attributes. +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name the name to look up +- */ +- void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Forget about an inode +- * +- * This function is called when the kernel removes an inode +- * from its internal caches. +- * +- * The inode's lookup count increases by one for every call to +- * fuse_reply_entry and fuse_reply_create. The nlookup parameter +- * indicates by how much the lookup count should be decreased. +- * +- * Inodes with a non-zero lookup count may receive request from +- * the kernel even after calls to unlink, rmdir or (when +- * overwriting an existing file) rename. Filesystems must handle +- * such requests properly and it is recommended to defer removal +- * of the inode until the lookup count reaches zero. Calls to +- * unlink, rmdir or rename will be followed closely by forget +- * unless the file or directory is open, in which case the +- * kernel issues forget only after the release or releasedir +- * calls. +- * +- * Note that if a file system will be exported over NFS the +- * inodes lifetime must extend even beyond forget. See the +- * generation field in struct fuse_entry_param above. +- * +- * On unmount the lookup count for all inodes implicitly drops +- * to zero. It is not guaranteed that the file system will +- * receive corresponding forget messages for the affected +- * inodes. +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param ino the inode number +- * @param nlookup the number of lookups to forget +- */ +- void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); +- +- /** +- * Get file attributes. +- * +- * If writeback caching is enabled, the kernel may have a +- * better idea of a file's length than the FUSE file system +- * (eg if there has been a write that extended the file size, +- * but that has not yet been passed to the filesystem.n +- * +- * In this case, the st_size value provided by the file system +- * will be ignored. +- * +- * Valid replies: +- * fuse_reply_attr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi for future use, currently always NULL +- */ +- void (*getattr) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Set file attributes +- * +- * In the 'attr' argument only members indicated by the 'to_set' +- * bitmask contain valid values. Other members contain undefined +- * values. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits if the file +- * size or owner is being changed. +- * +- * If the setattr was invoked from the ftruncate() system call +- * under Linux kernel versions 2.6.15 or later, the fi->fh will +- * contain the value set by the open method or will be undefined +- * if the open method didn't set any value. Otherwise (not +- * ftruncate call, or kernel version earlier than 2.6.15) the fi +- * parameter will be NULL. +- * +- * Valid replies: +- * fuse_reply_attr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param attr the attributes +- * @param to_set bit mask of attributes which should be set +- * @param fi file information, or NULL +- */ +- void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, +- int to_set, struct fuse_file_info *fi); +- +- /** +- * Read symbolic link +- * +- * Valid replies: +- * fuse_reply_readlink +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- */ +- void (*readlink) (fuse_req_t req, fuse_ino_t ino); +- +- /** +- * Create file node +- * +- * Create a regular file, character device, block device, fifo or +- * socket node. +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode file type and mode with which to create the new file +- * @param rdev the device number (only valid if created file is a device) +- */ +- void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, dev_t rdev); +- +- /** +- * Create a directory +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode with which to create the new file +- */ +- void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode); +- +- /** +- * Remove a file +- * +- * If the file's inode's lookup count is non-zero, the file +- * system is expected to postpone any removal of the inode +- * until the lookup count reaches zero (see description of the +- * forget function). +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to remove +- */ +- void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Remove a directory +- * +- * If the directory's inode's lookup count is non-zero, the +- * file system is expected to postpone any removal of the +- * inode until the lookup count reaches zero (see description +- * of the forget function). +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to remove +- */ +- void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Create a symbolic link +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param link the contents of the symbolic link +- * @param parent inode number of the parent directory +- * @param name to create +- */ +- void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, +- const char *name); +- +- /** Rename a file +- * +- * If the target exists it should be atomically replaced. If +- * the target's inode's lookup count is non-zero, the file +- * system is expected to postpone any removal of the inode +- * until the lookup count reaches zero (see description of the +- * forget function). +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EINVAL, i.e. all +- * future bmap requests will fail with EINVAL without being +- * send to the filesystem process. +- * +- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If +- * RENAME_NOREPLACE is specified, the filesystem must not +- * overwrite *newname* if it exists and return an error +- * instead. If `RENAME_EXCHANGE` is specified, the filesystem +- * must atomically exchange the two files, i.e. both must +- * exist and neither may be deleted. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the old parent directory +- * @param name old name +- * @param newparent inode number of the new parent directory +- * @param newname new name +- */ +- void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, +- fuse_ino_t newparent, const char *newname, +- unsigned int flags); +- +- /** +- * Create a hard link +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the old inode number +- * @param newparent inode number of the new parent directory +- * @param newname new name to create +- */ +- void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, +- const char *newname); +- +- /** +- * Open a file +- * +- * Open flags are available in fi->flags. The following rules +- * apply. +- * +- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be +- * filtered out / handled by the kernel. +- * +- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used +- * by the filesystem to check if the operation is +- * permitted. If the ``-o default_permissions`` mount +- * option is given, this check is already done by the +- * kernel before calling open() and may thus be omitted by +- * the filesystem. +- * +- * - When writeback caching is enabled, the kernel may send +- * read requests even for files opened with O_WRONLY. The +- * filesystem should be prepared to handle this. +- * +- * - When writeback caching is disabled, the filesystem is +- * expected to properly handle the O_APPEND flag and ensure +- * that each write is appending to the end of the file. +- * +- * - When writeback caching is enabled, the kernel will +- * handle O_APPEND. However, unless all changes to the file +- * come through the kernel this will not work reliably. The +- * filesystem should thus either ignore the O_APPEND flag +- * (and let the kernel handle it), or return an error +- * (indicating that reliably O_APPEND is not available). +- * +- * Filesystem may store an arbitrary file handle (pointer, +- * index, etc) in fi->fh, and use this in other all other file +- * operations (read, write, flush, release, fsync). +- * +- * Filesystem may also implement stateless file I/O and not store +- * anything in fi->fh. +- * +- * There are also some flags (direct_io, keep_cache) which the +- * filesystem may set in fi, to change the way the file is opened. +- * See fuse_file_info structure in for more details. +- * +- * If this request is answered with an error code of ENOSYS +- * and FUSE_CAP_NO_OPEN_SUPPORT is set in +- * `fuse_conn_info.capable`, this is treated as success and +- * future calls to open and release will also succeed without being +- * sent to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_open +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*open) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Read data +- * +- * Read should send exactly the number of bytes requested except +- * on EOF or error, otherwise the rest of the data will be +- * substituted with zeroes. An exception to this is when the file +- * has been opened in 'direct_io' mode, in which case the return +- * value of the read system call will reflect the return value of +- * this operation. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_iov +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size number of bytes to read +- * @param off offset to read from +- * @param fi file information +- */ +- void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Write data +- * +- * Write should return exactly the number of bytes requested +- * except on error. An exception to this is when the file has +- * been opened in 'direct_io' mode, in which case the return value +- * of the write system call will reflect the return value of this +- * operation. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param buf data to write +- * @param size number of bytes to write +- * @param off offset to write to +- * @param fi file information +- */ +- void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, +- size_t size, off_t off, struct fuse_file_info *fi); +- +- /** +- * Flush method +- * +- * This is called on each close() of the opened file. +- * +- * Since file descriptors can be duplicated (dup, dup2, fork), for +- * one open call there may be many flush calls. +- * +- * Filesystems shouldn't assume that flush will always be called +- * after some writes, or that if will be called at all. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * NOTE: the name of the method is misleading, since (unlike +- * fsync) the filesystem is not forced to flush pending writes. +- * One reason to flush data is if the filesystem wants to return +- * write errors during close. However, such use is non-portable +- * because POSIX does not require [close] to wait for delayed I/O to +- * complete. +- * +- * If the filesystem supports file locking operations (setlk, +- * getlk) it should remove all locks belonging to 'fi->owner'. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to flush() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * +- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html +- */ +- void (*flush) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Release an open file +- * +- * Release is called when there are no more references to an open +- * file: all file descriptors are closed and all memory mappings +- * are unmapped. +- * +- * For every open call there will be exactly one release call (unless +- * the filesystem is force-unmounted). +- * +- * The filesystem may reply with an error, but error values are +- * not returned to close() or munmap() which triggered the +- * release. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * fi->flags will contain the same flags as for open. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*release) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Synchronize file contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to fsync() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param datasync flag indicating if only data should be flushed +- * @param fi file information +- */ +- void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi); +- +- /** +- * Open a directory +- * +- * Filesystem may store an arbitrary file handle (pointer, index, +- * etc) in fi->fh, and use this in other all other directory +- * stream operations (readdir, releasedir, fsyncdir). +- * +- * If this request is answered with an error code of ENOSYS and +- * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, +- * this is treated as success and future calls to opendir and +- * releasedir will also succeed without being sent to the filesystem +- * process. In addition, the kernel will cache readdir results +- * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. +- * +- * Valid replies: +- * fuse_reply_open +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*opendir) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Read directory +- * +- * Send a buffer filled using fuse_add_direntry(), with size not +- * exceeding the requested size. Send an empty buffer on end of +- * stream. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * Returning a directory entry from readdir() does not affect +- * its lookup count. +- * +- * If off_t is non-zero, then it will correspond to one of the off_t +- * values that was previously returned by readdir() for the same +- * directory handle. In this case, readdir() should skip over entries +- * coming before the position defined by the off_t value. If entries +- * are added or removed while the directory handle is open, they filesystem +- * may still include the entries that have been removed, and may not +- * report the entries that have been created. However, addition or +- * removal of entries must never cause readdir() to skip over unrelated +- * entries or to report them more than once. This means +- * that off_t can not be a simple index that enumerates the entries +- * that have been returned but must contain sufficient information to +- * uniquely determine the next directory entry to return even when the +- * set of entries is changing. +- * +- * The function does not have to report the '.' and '..' +- * entries, but is allowed to do so. Note that, if readdir does +- * not return '.' or '..', they will not be implicitly returned, +- * and this behavior is observable by the caller. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum number of bytes to send +- * @param off offset to continue reading the directory stream +- * @param fi file information +- */ +- void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Release an open directory +- * +- * For every opendir call there will be exactly one releasedir +- * call (unless the filesystem is force-unmounted). +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*releasedir) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Synchronize directory contents +- * +- * If the datasync parameter is non-zero, then only the directory +- * contents should be flushed, not the meta data. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to fsyncdir() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param datasync flag indicating if only data should be flushed +- * @param fi file information +- */ +- void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi); +- +- /** +- * Get file system statistics +- * +- * Valid replies: +- * fuse_reply_statfs +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number, zero means "undefined" +- */ +- void (*statfs) (fuse_req_t req, fuse_ino_t ino); +- +- /** +- * Set an extended attribute +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future setxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- */ +- void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, +- const char *value, size_t size, int flags); +- +- /** +- * Get an extended attribute +- * +- * If size is zero, the size of the value should be sent with +- * fuse_reply_xattr. +- * +- * If the size is non-zero, and the value fits in the buffer, the +- * value should be sent with fuse_reply_buf. +- * +- * If the size is too small for the value, the ERANGE error should +- * be sent. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future getxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_xattr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param name of the extended attribute +- * @param size maximum size of the value to send +- */ +- void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, +- size_t size); +- +- /** +- * List extended attribute names +- * +- * If size is zero, the total size of the attribute list should be +- * sent with fuse_reply_xattr. +- * +- * If the size is non-zero, and the null character separated +- * attribute list fits in the buffer, the list should be sent with +- * fuse_reply_buf. +- * +- * If the size is too small for the list, the ERANGE error should +- * be sent. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future listxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_xattr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum size of the list to send +- */ +- void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); +- +- /** +- * Remove an extended attribute +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future removexattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param name of the extended attribute +- */ +- void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); +- +- /** +- * Check file access permissions +- * +- * This will be called for the access() and chdir() system +- * calls. If the 'default_permissions' mount option is given, +- * this method is not called. +- * +- * This method is not called under Linux kernel versions 2.4.x +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent success, i.e. this and all future access() +- * requests will succeed without being send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param mask requested access mode +- */ +- void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); +- +- /** +- * Create and open a file +- * +- * If the file does not exist, first create it with the specified +- * mode, and then open it. +- * +- * See the description of the open handler for more +- * information. +- * +- * If this method is not implemented or under Linux kernel +- * versions earlier than 2.6.15, the mknod() and open() methods +- * will be called instead. +- * +- * If this request is answered with an error code of ENOSYS, the handler +- * is treated as not implemented (i.e., for this and future requests the +- * mknod() and open() handlers will be called instead). +- * +- * Valid replies: +- * fuse_reply_create +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode file type and mode with which to create the new file +- * @param fi file information +- */ +- void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, struct fuse_file_info *fi); +- +- /** +- * Test for a POSIX file lock +- * +- * Valid replies: +- * fuse_reply_lock +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param lock the region/type to test +- */ +- void (*getlk) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, struct flock *lock); +- +- /** +- * Acquire, modify or release a POSIX file lock +- * +- * For POSIX threads (NPTL) there's a 1-1 relation between pid and +- * owner, but otherwise this is not always the case. For checking +- * lock ownership, 'fi->owner' must be used. The l_pid field in +- * 'struct flock' should only be used to fill in this field in +- * getlk(). +- * +- * Note: if the locking methods are not implemented, the kernel +- * will still allow file locking to work locally. Hence these are +- * only interesting for network filesystems and similar. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param lock the region/type to set +- * @param sleep locking operation may sleep +- */ +- void (*setlk) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, +- struct flock *lock, int sleep); +- +- /** +- * Map block index within file to block index within device +- * +- * Note: This makes sense only for block device backed filesystems +- * mounted with the 'blkdev' option +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure, i.e. all future bmap() requests will +- * fail with the same error code without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_bmap +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param blocksize unit of block index +- * @param idx block index within file +- */ +- void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, +- uint64_t idx); +- +- /** +- * Ioctl +- * +- * Note: For unrestricted ioctls (not allowed for FUSE +- * servers), data in and out areas can be discovered by giving +- * iovs and setting FUSE_IOCTL_RETRY in *flags*. For +- * restricted ioctls, kernel prepares in/out data area +- * according to the information encoded in cmd. +- * +- * Valid replies: +- * fuse_reply_ioctl_retry +- * fuse_reply_ioctl +- * fuse_reply_ioctl_iov +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param cmd ioctl command +- * @param arg ioctl argument +- * @param fi file information +- * @param flags for FUSE_IOCTL_* flags +- * @param in_buf data fetched from the caller +- * @param in_bufsz number of fetched bytes +- * @param out_bufsz maximum size of output data +- * +- * Note : the unsigned long request submitted by the application +- * is truncated to 32 bits. +- */ +- void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, +- void *arg, struct fuse_file_info *fi, unsigned flags, +- const void *in_buf, size_t in_bufsz, size_t out_bufsz); +- +- /** +- * Poll for IO readiness +- * +- * Note: If ph is non-NULL, the client should notify +- * when IO readiness events occur by calling +- * fuse_lowlevel_notify_poll() with the specified ph. +- * +- * Regardless of the number of times poll with a non-NULL ph +- * is received, single notification is enough to clear all. +- * Notifying more times incurs overhead but doesn't harm +- * correctness. +- * +- * The callee is responsible for destroying ph with +- * fuse_pollhandle_destroy() when no longer in use. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as success (with a kernel-defined default poll-mask) and +- * future calls to pull() will succeed the same way without being send +- * to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_poll +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param ph poll handle to be used for notification +- */ +- void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, +- struct fuse_pollhandle *ph); +- +- /** +- * Write data made available in a buffer +- * +- * This is a more generic version of the ->write() method. If +- * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the +- * kernel supports splicing from the fuse device, then the +- * data will be made available in pipe for supporting zero +- * copy data transfer. +- * +- * buf->count is guaranteed to be one (and thus buf->idx is +- * always zero). The write_buf handler must ensure that +- * bufv->off is correctly updated (reflecting the number of +- * bytes read from bufv->buf[0]). +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param bufv buffer containing the data +- * @param off offset to write to +- * @param fi file information +- */ +- void (*write_buf) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_bufvec *bufv, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Callback function for the retrieve request +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() +- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() +- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() +- * @param bufv the buffer containing the returned data +- */ +- void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv); +- +- /** +- * Forget about multiple inodes +- * +- * See description of the forget function for more +- * information. +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- */ +- void (*forget_multi) (fuse_req_t req, size_t count, +- struct fuse_forget_data *forgets); +- +- /** +- * Acquire, modify or release a BSD file lock +- * +- * Note: if the locking methods are not implemented, the kernel +- * will still allow file locking to work locally. Hence these are +- * only interesting for network filesystems and similar. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param op the locking operation, see flock(2) +- */ +- void (*flock) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, int op); +- +- /** +- * Allocate requested space. If this function returns success then +- * subsequent writes to the specified range shall not fail due to the lack +- * of free space on the file system storage media. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future fallocate() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param offset starting point for allocated region +- * @param length size of allocated region +- * @param mode determines the operation to be performed on the given range, +- * see fallocate(2) +- */ +- void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi); +- +- /** +- * Read directory with attributes +- * +- * Send a buffer filled using fuse_add_direntry_plus(), with size not +- * exceeding the requested size. Send an empty buffer on end of +- * stream. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * In contrast to readdir() (which does not affect the lookup counts), +- * the lookup count of every entry returned by readdirplus(), except "." +- * and "..", is incremented by one. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum number of bytes to send +- * @param off offset to continue reading the directory stream +- * @param fi file information +- */ +- void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Copy a range of data from one file to another +- * +- * Performs an optimized copy between two file descriptors without the +- * additional cost of transferring data through the FUSE kernel module +- * to user space (glibc) and then back into the FUSE filesystem again. +- * +- * In case this method is not implemented, glibc falls back to reading +- * data from the source and writing to the destination. Effectively +- * doing an inefficient copy of the data. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future copy_file_range() requests will fail with EOPNOTSUPP without +- * being send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino_in the inode number or the source file +- * @param off_in starting point from were the data should be read +- * @param fi_in file information of the source file +- * @param ino_out the inode number or the destination file +- * @param off_out starting point where the data should be written +- * @param fi_out file information of the destination file +- * @param len maximum size of the data to copy +- * @param flags passed along with the copy_file_range() syscall +- */ +- void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, +- off_t off_in, struct fuse_file_info *fi_in, +- fuse_ino_t ino_out, off_t off_out, +- struct fuse_file_info *fi_out, size_t len, +- int flags); +- +- /** +- * Find next data or hole after the specified offset +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure, i.e. all future lseek() requests will +- * fail with the same error code without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_lseek +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param off offset to start search from +- * @param whence either SEEK_DATA or SEEK_HOLE +- * @param fi file information +- */ +- void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, +- struct fuse_file_info *fi); ++ /** ++ * Initialize filesystem ++ * ++ * This function is called when libfuse establishes ++ * communication with the FUSE kernel module. The file system ++ * should use this module to inspect and/or modify the ++ * connection parameters provided in the `conn` structure. ++ * ++ * Note that some parameters may be overwritten by options ++ * passed to fuse_session_new() which take precedence over the ++ * values set in this handler. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*init)(void *userdata, struct fuse_conn_info *conn); ++ ++ /** ++ * Clean up filesystem. ++ * ++ * Called on filesystem exit. When this method is called, the ++ * connection to the kernel may be gone already, so that eg. calls ++ * to fuse_lowlevel_notify_* will fail. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*destroy)(void *userdata); ++ ++ /** ++ * Look up a directory entry by name and get its attributes. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name the name to look up ++ */ ++ void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Forget about an inode ++ * ++ * This function is called when the kernel removes an inode ++ * from its internal caches. ++ * ++ * The inode's lookup count increases by one for every call to ++ * fuse_reply_entry and fuse_reply_create. The nlookup parameter ++ * indicates by how much the lookup count should be decreased. ++ * ++ * Inodes with a non-zero lookup count may receive request from ++ * the kernel even after calls to unlink, rmdir or (when ++ * overwriting an existing file) rename. Filesystems must handle ++ * such requests properly and it is recommended to defer removal ++ * of the inode until the lookup count reaches zero. Calls to ++ * unlink, rmdir or rename will be followed closely by forget ++ * unless the file or directory is open, in which case the ++ * kernel issues forget only after the release or releasedir ++ * calls. ++ * ++ * Note that if a file system will be exported over NFS the ++ * inodes lifetime must extend even beyond forget. See the ++ * generation field in struct fuse_entry_param above. ++ * ++ * On unmount the lookup count for all inodes implicitly drops ++ * to zero. It is not guaranteed that the file system will ++ * receive corresponding forget messages for the affected ++ * inodes. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param nlookup the number of lookups to forget ++ */ ++ void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); ++ ++ /** ++ * Get file attributes. ++ * ++ * If writeback caching is enabled, the kernel may have a ++ * better idea of a file's length than the FUSE file system ++ * (eg if there has been a write that extended the file size, ++ * but that has not yet been passed to the filesystem.n ++ * ++ * In this case, the st_size value provided by the file system ++ * will be ignored. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi for future use, currently always NULL ++ */ ++ void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Set file attributes ++ * ++ * In the 'attr' argument only members indicated by the 'to_set' ++ * bitmask contain valid values. Other members contain undefined ++ * values. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits if the file ++ * size or owner is being changed. ++ * ++ * If the setattr was invoked from the ftruncate() system call ++ * under Linux kernel versions 2.6.15 or later, the fi->fh will ++ * contain the value set by the open method or will be undefined ++ * if the open method didn't set any value. Otherwise (not ++ * ftruncate call, or kernel version earlier than 2.6.15) the fi ++ * parameter will be NULL. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param attr the attributes ++ * @param to_set bit mask of attributes which should be set ++ * @param fi file information, or NULL ++ */ ++ void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int to_set, struct fuse_file_info *fi); ++ ++ /** ++ * Read symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_readlink ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ */ ++ void (*readlink)(fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Create file node ++ * ++ * Create a regular file, character device, block device, fifo or ++ * socket node. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param rdev the device number (only valid if created file is a device) ++ */ ++ void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev); ++ ++ /** ++ * Create a directory ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode with which to create the new file ++ */ ++ void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode); ++ ++ /** ++ * Remove a file ++ * ++ * If the file's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Remove a directory ++ * ++ * If the directory's inode's lookup count is non-zero, the ++ * file system is expected to postpone any removal of the ++ * inode until the lookup count reaches zero (see description ++ * of the forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Create a symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param link the contents of the symbolic link ++ * @param parent inode number of the parent directory ++ * @param name to create ++ */ ++ void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name); ++ ++ /** ++ * Rename a file ++ * ++ * If the target exists it should be atomically replaced. If ++ * the target's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EINVAL, i.e. all ++ * future bmap requests will fail with EINVAL without being ++ * send to the filesystem process. ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the old parent directory ++ * @param name old name ++ * @param newparent inode number of the new parent directory ++ * @param newname new name ++ */ ++ void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags); ++ ++ /** ++ * Create a hard link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the old inode number ++ * @param newparent inode number of the new parent directory ++ * @param newname new name to create ++ */ ++ void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, ++ const char *newname); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used ++ * by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount ++ * option is given, this check is already done by the ++ * kernel before calling open() and may thus be omitted by ++ * the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open and release will also succeed without being ++ * sent to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Read data ++ * ++ * Read should send exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the file ++ * has been opened in 'direct_io' mode, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_iov ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size number of bytes to read ++ * @param off offset to read from ++ * @param fi file information ++ */ ++ void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Write data ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the file has ++ * been opened in 'direct_io' mode, in which case the return value ++ * of the write system call will reflect the return value of this ++ * operation. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param buf data to write ++ * @param size number of bytes to write ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, ++ off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Flush method ++ * ++ * This is called on each close() of the opened file. ++ * ++ * Since file descriptors can be duplicated (dup, dup2, fork), for ++ * one open call there may be many flush calls. ++ * ++ * Filesystems shouldn't assume that flush will always be called ++ * after some writes, or that if will be called at all. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * NOTE: the name of the method is misleading, since (unlike ++ * fsync) the filesystem is not forced to flush pending writes. ++ * One reason to flush data is if the filesystem wants to return ++ * write errors during close. However, such use is non-portable ++ * because POSIX does not require [close] to wait for delayed I/O to ++ * complete. ++ * ++ * If the filesystem supports file locking operations (setlk, ++ * getlk) it should remove all locks belonging to 'fi->owner'. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to flush() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * ++ * [close]: ++ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open call there will be exactly one release call (unless ++ * the filesystem is force-unmounted). ++ * ++ * The filesystem may reply with an error, but error values are ++ * not returned to close() or munmap() which triggered the ++ * release. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * fi->flags will contain the same flags as for open. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsync() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Open a directory ++ * ++ * Filesystem may store an arbitrary file handle (pointer, index, ++ * etc) in fi->fh, and use this in other all other directory ++ * stream operations (readdir, releasedir, fsyncdir). ++ * ++ * If this request is answered with an error code of ENOSYS and ++ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, ++ * this is treated as success and future calls to opendir and ++ * releasedir will also succeed without being sent to the filesystem ++ * process. In addition, the kernel will cache readdir results ++ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory ++ * ++ * Send a buffer filled using fuse_add_direntry(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Returning a directory entry from readdir() does not affect ++ * its lookup count. ++ * ++ * If off_t is non-zero, then it will correspond to one of the off_t ++ * values that was previously returned by readdir() for the same ++ * directory handle. In this case, readdir() should skip over entries ++ * coming before the position defined by the off_t value. If entries ++ * are added or removed while the directory handle is open, they filesystem ++ * may still include the entries that have been removed, and may not ++ * report the entries that have been created. However, addition or ++ * removal of entries must never cause readdir() to skip over unrelated ++ * entries or to report them more than once. This means ++ * that off_t can not be a simple index that enumerates the entries ++ * that have been returned but must contain sufficient information to ++ * uniquely determine the next directory entry to return even when the ++ * set of entries is changing. ++ * ++ * The function does not have to report the '.' and '..' ++ * entries, but is allowed to do so. Note that, if readdir does ++ * not return '.' or '..', they will not be implicitly returned, ++ * and this behavior is observable by the caller. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open directory ++ * ++ * For every opendir call there will be exactly one releasedir ++ * call (unless the filesystem is force-unmounted). ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*releasedir)(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the directory ++ * contents should be flushed, not the meta data. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsyncdir() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Get file system statistics ++ * ++ * Valid replies: ++ * fuse_reply_statfs ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number, zero means "undefined" ++ */ ++ void (*statfs)(fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Set an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future setxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ */ ++ void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags); ++ ++ /** ++ * Get an extended attribute ++ * ++ * If size is zero, the size of the value should be sent with ++ * fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the value fits in the buffer, the ++ * value should be sent with fuse_reply_buf. ++ * ++ * If the size is too small for the value, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future getxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ * @param size maximum size of the value to send ++ */ ++ void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size); ++ ++ /** ++ * List extended attribute names ++ * ++ * If size is zero, the total size of the attribute list should be ++ * sent with fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the null character separated ++ * attribute list fits in the buffer, the list should be sent with ++ * fuse_reply_buf. ++ * ++ * If the size is too small for the list, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future listxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum size of the list to send ++ */ ++ void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size); ++ ++ /** ++ * Remove an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future removexattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ */ ++ void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() and chdir() system ++ * calls. If the 'default_permissions' mount option is given, ++ * this method is not called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent success, i.e. this and all future access() ++ * requests will succeed without being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param mask requested access mode ++ */ ++ void (*access)(fuse_req_t req, fuse_ino_t ino, int mask); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * See the description of the open handler for more ++ * information. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ * ++ * If this request is answered with an error code of ENOSYS, the handler ++ * is treated as not implemented (i.e., for this and future requests the ++ * mknod() and open() handlers will be called instead). ++ * ++ * Valid replies: ++ * fuse_reply_create ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param fi file information ++ */ ++ void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi); ++ ++ /** ++ * Test for a POSIX file lock ++ * ++ * Valid replies: ++ * fuse_reply_lock ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to test ++ */ ++ void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock); ++ ++ /** ++ * Acquire, modify or release a POSIX file lock ++ * ++ * For POSIX threads (NPTL) there's a 1-1 relation between pid and ++ * owner, but otherwise this is not always the case. For checking ++ * lock ownership, 'fi->owner' must be used. The l_pid field in ++ * 'struct flock' should only be used to fill in this field in ++ * getlk(). ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to set ++ * @param sleep locking operation may sleep ++ */ ++ void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock, int sleep); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future bmap() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_bmap ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param blocksize unit of block index ++ * @param idx block index within file ++ */ ++ void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize, ++ uint64_t idx); ++ ++ /** ++ * Ioctl ++ * ++ * Note: For unrestricted ioctls (not allowed for FUSE ++ * servers), data in and out areas can be discovered by giving ++ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For ++ * restricted ioctls, kernel prepares in/out data area ++ * according to the information encoded in cmd. ++ * ++ * Valid replies: ++ * fuse_reply_ioctl_retry ++ * fuse_reply_ioctl ++ * fuse_reply_ioctl_iov ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param cmd ioctl command ++ * @param arg ioctl argument ++ * @param fi file information ++ * @param flags for FUSE_IOCTL_* flags ++ * @param in_buf data fetched from the caller ++ * @param in_bufsz number of fetched bytes ++ * @param out_bufsz maximum size of output data ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, ++ struct fuse_file_info *fi, unsigned flags, const void *in_buf, ++ size_t in_bufsz, size_t out_bufsz); ++ ++ /** ++ * Poll for IO readiness ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_lowlevel_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as success (with a kernel-defined default poll-mask) and ++ * future calls to pull() will succeed the same way without being send ++ * to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_poll ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param ph poll handle to be used for notification ++ */ ++ void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct fuse_pollhandle *ph); ++ ++ /** ++ * Write data made available in a buffer ++ * ++ * This is a more generic version of the ->write() method. If ++ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the ++ * kernel supports splicing from the fuse device, then the ++ * data will be made available in pipe for supporting zero ++ * copy data transfer. ++ * ++ * buf->count is guaranteed to be one (and thus buf->idx is ++ * always zero). The write_buf handler must ensure that ++ * bufv->off is correctly updated (reflecting the number of ++ * bytes read from bufv->buf[0]). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param bufv buffer containing the data ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv, ++ off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Callback function for the retrieve request ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() ++ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() ++ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() ++ * @param bufv the buffer containing the returned data ++ */ ++ void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv); ++ ++ /** ++ * Forget about multiple inodes ++ * ++ * See description of the forget function for more ++ * information. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ */ ++ void (*forget_multi)(fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets); ++ ++ /** ++ * Acquire, modify or release a BSD file lock ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param op the locking operation, see flock(2) ++ */ ++ void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ int op); ++ ++ /** ++ * Allocate requested space. If this function returns success then ++ * subsequent writes to the specified range shall not fail due to the lack ++ * of free space on the file system storage media. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future fallocate() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param offset starting point for allocated region ++ * @param length size of allocated region ++ * @param mode determines the operation to be performed on the given range, ++ * see fallocate(2) ++ */ ++ void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, ++ off_t length, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory with attributes ++ * ++ * Send a buffer filled using fuse_add_direntry_plus(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * In contrast to readdir() (which does not affect the lookup counts), ++ * the lookup count of every entry returned by readdirplus(), except "." ++ * and "..", is incremented by one. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future copy_file_range() requests will fail with EOPNOTSUPP without ++ * being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino_in the inode number or the source file ++ * @param off_in starting point from were the data should be read ++ * @param fi_in file information of the source file ++ * @param ino_out the inode number or the destination file ++ * @param off_out starting point where the data should be written ++ * @param fi_out file information of the destination file ++ * @param len maximum size of the data to copy ++ * @param flags passed along with the copy_file_range() syscall ++ */ ++ void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, ++ struct fuse_file_info *fi_in, fuse_ino_t ino_out, ++ off_t off_out, struct fuse_file_info *fi_out, ++ size_t len, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future lseek() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_lseek ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param off offset to start search from ++ * @param whence either SEEK_DATA or SEEK_HOLE ++ * @param fi file information ++ */ ++ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi); + }; + + /** +@@ -1305,7 +1307,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *fi); ++ const struct fuse_file_info *fi); + + /** + * Reply with attributes +@@ -1315,11 +1317,11 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, + * + * @param req request handle + * @param attr the attributes +- * @param attr_timeout validity timeout (in seconds) for the attributes ++ * @param attr_timeout validity timeout (in seconds) for the attributes + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, +- double attr_timeout); ++ double attr_timeout); + + /** + * Reply with the contents of a symbolic link +@@ -1417,7 +1419,7 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ enum fuse_buf_copy_flags flags); + + /** + * Reply with data vector +@@ -1480,9 +1482,9 @@ int fuse_reply_lock(fuse_req_t req, const struct flock *lock); + */ + int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + +-/* ----------------------------------------------------------- * +- * Filling a buffer in readdir * +- * ----------------------------------------------------------- */ ++/* ++ * Filling a buffer in readdir ++ */ + + /** + * Add a directory entry to the buffer +@@ -1512,8 +1514,7 @@ int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + * @return the space needed for the entry + */ + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, const struct stat *stbuf, +- off_t off); ++ const char *name, const struct stat *stbuf, off_t off); + + /** + * Add a directory entry to the buffer with the attributes +@@ -1529,8 +1530,8 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, + * @return the space needed for the entry + */ + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, +- const struct fuse_entry_param *e, off_t off); ++ const char *name, ++ const struct fuse_entry_param *e, off_t off); + + /** + * Reply to ask for data fetch and output buffer preparation. ioctl +@@ -1547,9 +1548,9 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, + * @param out_count number of entries in out_iov + * @return zero for success, -errno for failure to send reply + */ +-int fuse_reply_ioctl_retry(fuse_req_t req, +- const struct iovec *in_iov, size_t in_count, +- const struct iovec *out_iov, size_t out_count); ++int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, ++ size_t in_count, const struct iovec *out_iov, ++ size_t out_count); + + /** + * Reply to finish ioctl +@@ -1576,7 +1577,7 @@ int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); + * @param count the size of vector + */ + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, +- int count); ++ int count); + + /** + * Reply with poll result event mask +@@ -1598,9 +1599,9 @@ int fuse_reply_poll(fuse_req_t req, unsigned revents); + */ + int fuse_reply_lseek(fuse_req_t req, off_t off); + +-/* ----------------------------------------------------------- * +- * Notification * +- * ----------------------------------------------------------- */ ++/* ++ * Notification ++ */ + + /** + * Notify IO readiness event +@@ -1635,7 +1636,7 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, +- off_t off, off_t len); ++ off_t off, off_t len); + + /** + * Notify to invalidate parent attributes and the dentry matching +@@ -1663,7 +1664,7 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, +- const char *name, size_t namelen); ++ const char *name, size_t namelen); + + /** + * This function behaves like fuse_lowlevel_notify_inval_entry() with +@@ -1693,9 +1694,9 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + * @param namelen strlen() of file name + * @return zero for success, -errno for failure + */ +-int fuse_lowlevel_notify_delete(struct fuse_session *se, +- fuse_ino_t parent, fuse_ino_t child, +- const char *name, size_t namelen); ++int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, ++ fuse_ino_t child, const char *name, ++ size_t namelen); + + /** + * Store data to the kernel buffers +@@ -1723,8 +1724,8 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); + /** + * Retrieve data from the kernel buffers + * +@@ -1755,12 +1756,12 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie); ++ size_t size, off_t offset, void *cookie); + + +-/* ----------------------------------------------------------- * +- * Utility functions * +- * ----------------------------------------------------------- */ ++/* ++ * Utility functions ++ */ + + /** + * Get the userdata from the request +@@ -1822,7 +1823,7 @@ typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); + * @param data user data passed to the callback function + */ + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, +- void *data); ++ void *data); + + /** + * Check if a request has already been interrupted +@@ -1833,9 +1834,9 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, + int fuse_req_interrupted(fuse_req_t req); + + +-/* ----------------------------------------------------------- * +- * Inquiry functions * +- * ----------------------------------------------------------- */ ++/* ++ * Inquiry functions ++ */ + + /** + * Print low-level version information to stdout. +@@ -1854,18 +1855,18 @@ void fuse_lowlevel_help(void); + */ + void fuse_cmdline_help(void); + +-/* ----------------------------------------------------------- * +- * Filesystem setup & teardown * +- * ----------------------------------------------------------- */ ++/* ++ * Filesystem setup & teardown ++ */ + + struct fuse_cmdline_opts { +- int foreground; +- int debug; +- int nodefault_subtype; +- char *mountpoint; +- int show_version; +- int show_help; +- unsigned int max_idle_threads; ++ int foreground; ++ int debug; ++ int nodefault_subtype; ++ char *mountpoint; ++ int show_version; ++ int show_help; ++ unsigned int max_idle_threads; + }; + + /** +@@ -1886,8 +1887,7 @@ struct fuse_cmdline_opts { + * @param opts output argument for parsed options + * @return 0 on success, -1 on failure + */ +-int fuse_parse_cmdline(struct fuse_args *args, +- struct fuse_cmdline_opts *opts); ++int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts); + + /** + * Create a low level session. +@@ -1918,8 +1918,8 @@ int fuse_parse_cmdline(struct fuse_args *args, + * @return the fuse session on success, NULL on failure + **/ + struct fuse_session *fuse_session_new(struct fuse_args *args, +- const struct fuse_lowlevel_ops *op, +- size_t op_size, void *userdata); ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata); + + /** + * Mount a FUSE file system. +@@ -2014,9 +2014,9 @@ void fuse_session_unmount(struct fuse_session *se); + */ + void fuse_session_destroy(struct fuse_session *se); + +-/* ----------------------------------------------------------- * +- * Custom event loop support * +- * ----------------------------------------------------------- */ ++/* ++ * Custom event loop support ++ */ + + /** + * Return file descriptor for communication with kernel. +@@ -2043,7 +2043,7 @@ int fuse_session_fd(struct fuse_session *se); + * @param buf the fuse_buf containing the request + */ + void fuse_session_process_buf(struct fuse_session *se, +- const struct fuse_buf *buf); ++ const struct fuse_buf *buf); + + /** + * Read a raw request from the kernel into the supplied buffer. +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +index 2f6663e..f252baa 100644 +--- a/tools/virtiofsd/fuse_misc.h ++++ b/tools/virtiofsd/fuse_misc.h +@@ -1,18 +1,18 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include + + /* +- Versioned symbols cannot be used in some cases because it +- - confuse the dynamic linker in uClibc +- - not supported on MacOSX (in MachO binary format) +-*/ ++ * Versioned symbols cannot be used in some cases because it ++ * - confuse the dynamic linker in uClibc ++ * - not supported on MacOSX (in MachO binary format) ++ */ + #if (!defined(__UCLIBC__) && !defined(__APPLE__)) + #define FUSE_SYMVER(x) __asm__(x) + #else +@@ -25,11 +25,11 @@ + /* Is this hack still needed? */ + static inline void fuse_mutex_init(pthread_mutex_t *mut) + { +- pthread_mutexattr_t attr; +- pthread_mutexattr_init(&attr); +- pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); +- pthread_mutex_init(mut, &attr); +- pthread_mutexattr_destroy(&attr); ++ pthread_mutexattr_t attr; ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); ++ pthread_mutex_init(mut, &attr); ++ pthread_mutexattr_destroy(&attr); + } + #endif + +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +index 93066b9..edd36f4 100644 +--- a/tools/virtiofsd/fuse_opt.c ++++ b/tools/virtiofsd/fuse_opt.c +@@ -1,423 +1,450 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Implementation of option parsing routines (dealing with `struct +- fuse_args`). +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Implementation of option parsing routines (dealing with `struct ++ * fuse_args`). ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + ++#include "fuse_opt.h" + #include "config.h" + #include "fuse_i.h" +-#include "fuse_opt.h" + #include "fuse_misc.h" + ++#include + #include + #include + #include +-#include + + struct fuse_opt_context { +- void *data; +- const struct fuse_opt *opt; +- fuse_opt_proc_t proc; +- int argctr; +- int argc; +- char **argv; +- struct fuse_args outargs; +- char *opts; +- int nonopt; ++ void *data; ++ const struct fuse_opt *opt; ++ fuse_opt_proc_t proc; ++ int argctr; ++ int argc; ++ char **argv; ++ struct fuse_args outargs; ++ char *opts; ++ int nonopt; + }; + + void fuse_opt_free_args(struct fuse_args *args) + { +- if (args) { +- if (args->argv && args->allocated) { +- int i; +- for (i = 0; i < args->argc; i++) +- free(args->argv[i]); +- free(args->argv); +- } +- args->argc = 0; +- args->argv = NULL; +- args->allocated = 0; +- } ++ if (args) { ++ if (args->argv && args->allocated) { ++ int i; ++ for (i = 0; i < args->argc; i++) { ++ free(args->argv[i]); ++ } ++ free(args->argv); ++ } ++ args->argc = 0; ++ args->argv = NULL; ++ args->allocated = 0; ++ } + } + + static int alloc_failed(void) + { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; + } + + int fuse_opt_add_arg(struct fuse_args *args, const char *arg) + { +- char **newargv; +- char *newarg; +- +- assert(!args->argv || args->allocated); +- +- newarg = strdup(arg); +- if (!newarg) +- return alloc_failed(); +- +- newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); +- if (!newargv) { +- free(newarg); +- return alloc_failed(); +- } +- +- args->argv = newargv; +- args->allocated = 1; +- args->argv[args->argc++] = newarg; +- args->argv[args->argc] = NULL; +- return 0; ++ char **newargv; ++ char *newarg; ++ ++ assert(!args->argv || args->allocated); ++ ++ newarg = strdup(arg); ++ if (!newarg) { ++ return alloc_failed(); ++ } ++ ++ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); ++ if (!newargv) { ++ free(newarg); ++ return alloc_failed(); ++ } ++ ++ args->argv = newargv; ++ args->allocated = 1; ++ args->argv[args->argc++] = newarg; ++ args->argv[args->argc] = NULL; ++ return 0; + } + + static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, +- const char *arg) ++ const char *arg) + { +- assert(pos <= args->argc); +- if (fuse_opt_add_arg(args, arg) == -1) +- return -1; +- +- if (pos != args->argc - 1) { +- char *newarg = args->argv[args->argc - 1]; +- memmove(&args->argv[pos + 1], &args->argv[pos], +- sizeof(char *) * (args->argc - pos - 1)); +- args->argv[pos] = newarg; +- } +- return 0; ++ assert(pos <= args->argc); ++ if (fuse_opt_add_arg(args, arg) == -1) { ++ return -1; ++ } ++ ++ if (pos != args->argc - 1) { ++ char *newarg = args->argv[args->argc - 1]; ++ memmove(&args->argv[pos + 1], &args->argv[pos], ++ sizeof(char *) * (args->argc - pos - 1)); ++ args->argv[pos] = newarg; ++ } ++ return 0; + } + + int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) + { +- return fuse_opt_insert_arg_common(args, pos, arg); ++ return fuse_opt_insert_arg_common(args, pos, arg); + } + + static int next_arg(struct fuse_opt_context *ctx, const char *opt) + { +- if (ctx->argctr + 1 >= ctx->argc) { +- fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); +- return -1; +- } +- ctx->argctr++; +- return 0; ++ if (ctx->argctr + 1 >= ctx->argc) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); ++ return -1; ++ } ++ ctx->argctr++; ++ return 0; + } + + static int add_arg(struct fuse_opt_context *ctx, const char *arg) + { +- return fuse_opt_add_arg(&ctx->outargs, arg); ++ return fuse_opt_add_arg(&ctx->outargs, arg); + } + + static int add_opt_common(char **opts, const char *opt, int esc) + { +- unsigned oldlen = *opts ? strlen(*opts) : 0; +- char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); +- +- if (!d) +- return alloc_failed(); +- +- *opts = d; +- if (oldlen) { +- d += oldlen; +- *d++ = ','; +- } +- +- for (; *opt; opt++) { +- if (esc && (*opt == ',' || *opt == '\\')) +- *d++ = '\\'; +- *d++ = *opt; +- } +- *d = '\0'; +- +- return 0; ++ unsigned oldlen = *opts ? strlen(*opts) : 0; ++ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); ++ ++ if (!d) { ++ return alloc_failed(); ++ } ++ ++ *opts = d; ++ if (oldlen) { ++ d += oldlen; ++ *d++ = ','; ++ } ++ ++ for (; *opt; opt++) { ++ if (esc && (*opt == ',' || *opt == '\\')) { ++ *d++ = '\\'; ++ } ++ *d++ = *opt; ++ } ++ *d = '\0'; ++ ++ return 0; + } + + int fuse_opt_add_opt(char **opts, const char *opt) + { +- return add_opt_common(opts, opt, 0); ++ return add_opt_common(opts, opt, 0); + } + + int fuse_opt_add_opt_escaped(char **opts, const char *opt) + { +- return add_opt_common(opts, opt, 1); ++ return add_opt_common(opts, opt, 1); + } + + static int add_opt(struct fuse_opt_context *ctx, const char *opt) + { +- return add_opt_common(&ctx->opts, opt, 1); ++ return add_opt_common(&ctx->opts, opt, 1); + } + + static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, +- int iso) ++ int iso) + { +- if (key == FUSE_OPT_KEY_DISCARD) +- return 0; +- +- if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { +- int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); +- if (res == -1 || !res) +- return res; +- } +- if (iso) +- return add_opt(ctx, arg); +- else +- return add_arg(ctx, arg); ++ if (key == FUSE_OPT_KEY_DISCARD) { ++ return 0; ++ } ++ ++ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { ++ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); ++ if (res == -1 || !res) { ++ return res; ++ } ++ } ++ if (iso) { ++ return add_opt(ctx, arg); ++ } else { ++ return add_arg(ctx, arg); ++ } + } + + static int match_template(const char *t, const char *arg, unsigned *sepp) + { +- int arglen = strlen(arg); +- const char *sep = strchr(t, '='); +- sep = sep ? sep : strchr(t, ' '); +- if (sep && (!sep[1] || sep[1] == '%')) { +- int tlen = sep - t; +- if (sep[0] == '=') +- tlen ++; +- if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { +- *sepp = sep - t; +- return 1; +- } +- } +- if (strcmp(t, arg) == 0) { +- *sepp = 0; +- return 1; +- } +- return 0; ++ int arglen = strlen(arg); ++ const char *sep = strchr(t, '='); ++ sep = sep ? sep : strchr(t, ' '); ++ if (sep && (!sep[1] || sep[1] == '%')) { ++ int tlen = sep - t; ++ if (sep[0] == '=') { ++ tlen++; ++ } ++ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { ++ *sepp = sep - t; ++ return 1; ++ } ++ } ++ if (strcmp(t, arg) == 0) { ++ *sepp = 0; ++ return 1; ++ } ++ return 0; + } + + static const struct fuse_opt *find_opt(const struct fuse_opt *opt, +- const char *arg, unsigned *sepp) ++ const char *arg, unsigned *sepp) + { +- for (; opt && opt->templ; opt++) +- if (match_template(opt->templ, arg, sepp)) +- return opt; +- return NULL; ++ for (; opt && opt->templ; opt++) { ++ if (match_template(opt->templ, arg, sepp)) { ++ return opt; ++ } ++ } ++ return NULL; + } + + int fuse_opt_match(const struct fuse_opt *opts, const char *opt) + { +- unsigned dummy; +- return find_opt(opts, opt, &dummy) ? 1 : 0; ++ unsigned dummy; ++ return find_opt(opts, opt, &dummy) ? 1 : 0; + } + + static int process_opt_param(void *var, const char *format, const char *param, +- const char *arg) ++ const char *arg) + { +- assert(format[0] == '%'); +- if (format[1] == 's') { +- char **s = var; +- char *copy = strdup(param); +- if (!copy) +- return alloc_failed(); +- +- free(*s); +- *s = copy; +- } else { +- if (sscanf(param, format, var) != 1) { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); +- return -1; +- } +- } +- return 0; ++ assert(format[0] == '%'); ++ if (format[1] == 's') { ++ char **s = var; ++ char *copy = strdup(param); ++ if (!copy) { ++ return alloc_failed(); ++ } ++ ++ free(*s); ++ *s = copy; ++ } else { ++ if (sscanf(param, format, var) != 1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", ++ arg); ++ return -1; ++ } ++ } ++ return 0; + } + +-static int process_opt(struct fuse_opt_context *ctx, +- const struct fuse_opt *opt, unsigned sep, +- const char *arg, int iso) ++static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt, ++ unsigned sep, const char *arg, int iso) + { +- if (opt->offset == -1U) { +- if (call_proc(ctx, arg, opt->value, iso) == -1) +- return -1; +- } else { +- void *var = (char *)ctx->data + opt->offset; +- if (sep && opt->templ[sep + 1]) { +- const char *param = arg + sep; +- if (opt->templ[sep] == '=') +- param ++; +- if (process_opt_param(var, opt->templ + sep + 1, +- param, arg) == -1) +- return -1; +- } else +- *(int *)var = opt->value; +- } +- return 0; ++ if (opt->offset == -1U) { ++ if (call_proc(ctx, arg, opt->value, iso) == -1) { ++ return -1; ++ } ++ } else { ++ void *var = (char *)ctx->data + opt->offset; ++ if (sep && opt->templ[sep + 1]) { ++ const char *param = arg + sep; ++ if (opt->templ[sep] == '=') { ++ param++; ++ } ++ if (process_opt_param(var, opt->templ + sep + 1, param, arg) == ++ -1) { ++ return -1; ++ } ++ } else { ++ *(int *)var = opt->value; ++ } ++ } ++ return 0; + } + + static int process_opt_sep_arg(struct fuse_opt_context *ctx, +- const struct fuse_opt *opt, unsigned sep, +- const char *arg, int iso) ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) + { +- int res; +- char *newarg; +- char *param; +- +- if (next_arg(ctx, arg) == -1) +- return -1; +- +- param = ctx->argv[ctx->argctr]; +- newarg = malloc(sep + strlen(param) + 1); +- if (!newarg) +- return alloc_failed(); +- +- memcpy(newarg, arg, sep); +- strcpy(newarg + sep, param); +- res = process_opt(ctx, opt, sep, newarg, iso); +- free(newarg); +- +- return res; ++ int res; ++ char *newarg; ++ char *param; ++ ++ if (next_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ++ param = ctx->argv[ctx->argctr]; ++ newarg = malloc(sep + strlen(param) + 1); ++ if (!newarg) { ++ return alloc_failed(); ++ } ++ ++ memcpy(newarg, arg, sep); ++ strcpy(newarg + sep, param); ++ res = process_opt(ctx, opt, sep, newarg, iso); ++ free(newarg); ++ ++ return res; + } + + static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) + { +- unsigned sep; +- const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); +- if (opt) { +- for (; opt; opt = find_opt(opt + 1, arg, &sep)) { +- int res; +- if (sep && opt->templ[sep] == ' ' && !arg[sep]) +- res = process_opt_sep_arg(ctx, opt, sep, arg, +- iso); +- else +- res = process_opt(ctx, opt, sep, arg, iso); +- if (res == -1) +- return -1; +- } +- return 0; +- } else +- return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++ unsigned sep; ++ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); ++ if (opt) { ++ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { ++ int res; ++ if (sep && opt->templ[sep] == ' ' && !arg[sep]) { ++ res = process_opt_sep_arg(ctx, opt, sep, arg, iso); ++ } else { ++ res = process_opt(ctx, opt, sep, arg, iso); ++ } ++ if (res == -1) { ++ return -1; ++ } ++ } ++ return 0; ++ } else { ++ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++ } + } + + static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) + { +- char *s = opts; +- char *d = s; +- int end = 0; +- +- while (!end) { +- if (*s == '\0') +- end = 1; +- if (*s == ',' || end) { +- int res; +- +- *d = '\0'; +- res = process_gopt(ctx, opts, 1); +- if (res == -1) +- return -1; +- d = opts; +- } else { +- if (s[0] == '\\' && s[1] != '\0') { +- s++; +- if (s[0] >= '0' && s[0] <= '3' && +- s[1] >= '0' && s[1] <= '7' && +- s[2] >= '0' && s[2] <= '7') { +- *d++ = (s[0] - '0') * 0100 + +- (s[1] - '0') * 0010 + +- (s[2] - '0'); +- s += 2; +- } else { +- *d++ = *s; +- } +- } else { +- *d++ = *s; +- } +- } +- s++; +- } +- +- return 0; ++ char *s = opts; ++ char *d = s; ++ int end = 0; ++ ++ while (!end) { ++ if (*s == '\0') { ++ end = 1; ++ } ++ if (*s == ',' || end) { ++ int res; ++ ++ *d = '\0'; ++ res = process_gopt(ctx, opts, 1); ++ if (res == -1) { ++ return -1; ++ } ++ d = opts; ++ } else { ++ if (s[0] == '\\' && s[1] != '\0') { ++ s++; ++ if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' && ++ s[2] >= '0' && s[2] <= '7') { ++ *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 + ++ (s[2] - '0'); ++ s += 2; ++ } else { ++ *d++ = *s; ++ } ++ } else { ++ *d++ = *s; ++ } ++ } ++ s++; ++ } ++ ++ return 0; + } + + static int process_option_group(struct fuse_opt_context *ctx, const char *opts) + { +- int res; +- char *copy = strdup(opts); +- +- if (!copy) { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; +- } +- res = process_real_option_group(ctx, copy); +- free(copy); +- return res; ++ int res; ++ char *copy = strdup(opts); ++ ++ if (!copy) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++ res = process_real_option_group(ctx, copy); ++ free(copy); ++ return res; + } + + static int process_one(struct fuse_opt_context *ctx, const char *arg) + { +- if (ctx->nonopt || arg[0] != '-') +- return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); +- else if (arg[1] == 'o') { +- if (arg[2]) +- return process_option_group(ctx, arg + 2); +- else { +- if (next_arg(ctx, arg) == -1) +- return -1; +- +- return process_option_group(ctx, +- ctx->argv[ctx->argctr]); +- } +- } else if (arg[1] == '-' && !arg[2]) { +- if (add_arg(ctx, arg) == -1) +- return -1; +- ctx->nonopt = ctx->outargs.argc; +- return 0; +- } else +- return process_gopt(ctx, arg, 0); ++ if (ctx->nonopt || arg[0] != '-') { ++ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); ++ } else if (arg[1] == 'o') { ++ if (arg[2]) { ++ return process_option_group(ctx, arg + 2); ++ } else { ++ if (next_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ++ return process_option_group(ctx, ctx->argv[ctx->argctr]); ++ } ++ } else if (arg[1] == '-' && !arg[2]) { ++ if (add_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ctx->nonopt = ctx->outargs.argc; ++ return 0; ++ } else { ++ return process_gopt(ctx, arg, 0); ++ } + } + + static int opt_parse(struct fuse_opt_context *ctx) + { +- if (ctx->argc) { +- if (add_arg(ctx, ctx->argv[0]) == -1) +- return -1; +- } +- +- for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) +- if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) +- return -1; +- +- if (ctx->opts) { +- if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || +- fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) +- return -1; +- } +- +- /* If option separator ("--") is the last argument, remove it */ +- if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && +- strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { +- free(ctx->outargs.argv[ctx->outargs.argc - 1]); +- ctx->outargs.argv[--ctx->outargs.argc] = NULL; +- } +- +- return 0; ++ if (ctx->argc) { ++ if (add_arg(ctx, ctx->argv[0]) == -1) { ++ return -1; ++ } ++ } ++ ++ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) { ++ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) { ++ return -1; ++ } ++ } ++ ++ if (ctx->opts) { ++ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || ++ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) { ++ return -1; ++ } ++ } ++ ++ /* If option separator ("--") is the last argument, remove it */ ++ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && ++ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { ++ free(ctx->outargs.argv[ctx->outargs.argc - 1]); ++ ctx->outargs.argv[--ctx->outargs.argc] = NULL; ++ } ++ ++ return 0; + } + + int fuse_opt_parse(struct fuse_args *args, void *data, +- const struct fuse_opt opts[], fuse_opt_proc_t proc) ++ const struct fuse_opt opts[], fuse_opt_proc_t proc) + { +- int res; +- struct fuse_opt_context ctx = { +- .data = data, +- .opt = opts, +- .proc = proc, +- }; +- +- if (!args || !args->argv || !args->argc) +- return 0; +- +- ctx.argc = args->argc; +- ctx.argv = args->argv; +- +- res = opt_parse(&ctx); +- if (res != -1) { +- struct fuse_args tmp = *args; +- *args = ctx.outargs; +- ctx.outargs = tmp; +- } +- free(ctx.opts); +- fuse_opt_free_args(&ctx.outargs); +- return res; ++ int res; ++ struct fuse_opt_context ctx = { ++ .data = data, ++ .opt = opts, ++ .proc = proc, ++ }; ++ ++ if (!args || !args->argv || !args->argc) { ++ return 0; ++ } ++ ++ ctx.argc = args->argc; ++ ctx.argv = args->argv; ++ ++ res = opt_parse(&ctx); ++ if (res != -1) { ++ struct fuse_args tmp = *args; ++ *args = ctx.outargs; ++ ctx.outargs = tmp; ++ } ++ free(ctx.opts); ++ fuse_opt_free_args(&ctx.outargs); ++ return res; + } +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +index 6910255..8f59b4d 100644 +--- a/tools/virtiofsd/fuse_opt.h ++++ b/tools/virtiofsd/fuse_opt.h +@@ -1,10 +1,10 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_OPT_H_ + #define FUSE_OPT_H_ +@@ -37,7 +37,7 @@ + * + * - 'offsetof(struct foo, member)' actions i) and iii) + * +- * - -1 action ii) ++ * - -1 action ii) + * + * The 'offsetof()' macro is defined in the header. + * +@@ -48,7 +48,7 @@ + * + * The types of templates are: + * +- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only ++ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only + * themselves. Invalid values are "--" and anything beginning + * with "-o" + * +@@ -71,58 +71,67 @@ + * freed. + */ + struct fuse_opt { +- /** Matching template and optional parameter formatting */ +- const char *templ; ++ /** Matching template and optional parameter formatting */ ++ const char *templ; + +- /** +- * Offset of variable within 'data' parameter of fuse_opt_parse() +- * or -1 +- */ +- unsigned long offset; ++ /** ++ * Offset of variable within 'data' parameter of fuse_opt_parse() ++ * or -1 ++ */ ++ unsigned long offset; + +- /** +- * Value to set the variable to, or to be passed as 'key' to the +- * processing function. Ignored if template has a format +- */ +- int value; ++ /** ++ * Value to set the variable to, or to be passed as 'key' to the ++ * processing function. Ignored if template has a format ++ */ ++ int value; + }; + + /** +- * Key option. In case of a match, the processing function will be ++ * Key option. In case of a match, the processing function will be + * called with the specified key. + */ +-#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } ++#define FUSE_OPT_KEY(templ, key) \ ++ { \ ++ templ, -1U, key \ ++ } + + /** +- * Last option. An array of 'struct fuse_opt' must end with a NULL ++ * Last option. An array of 'struct fuse_opt' must end with a NULL + * template value + */ +-#define FUSE_OPT_END { NULL, 0, 0 } ++#define FUSE_OPT_END \ ++ { \ ++ NULL, 0, 0 \ ++ } + + /** + * Argument list + */ + struct fuse_args { +- /** Argument count */ +- int argc; ++ /** Argument count */ ++ int argc; + +- /** Argument vector. NULL terminated */ +- char **argv; ++ /** Argument vector. NULL terminated */ ++ char **argv; + +- /** Is 'argv' allocated? */ +- int allocated; ++ /** Is 'argv' allocated? */ ++ int allocated; + }; + + /** + * Initializer for 'struct fuse_args' + */ +-#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } ++#define FUSE_ARGS_INIT(argc, argv) \ ++ { \ ++ argc, argv, 0 \ ++ } + + /** + * Key value passed to the processing function if an option did not + * match any template + */ +-#define FUSE_OPT_KEY_OPT -1 ++#define FUSE_OPT_KEY_OPT -1 + + /** + * Key value passed to the processing function for all non-options +@@ -130,7 +139,7 @@ struct fuse_args { + * Non-options are the arguments beginning with a character other than + * '-' or all arguments after the special '--' option + */ +-#define FUSE_OPT_KEY_NONOPT -2 ++#define FUSE_OPT_KEY_NONOPT -2 + + /** + * Special key value for options to keep +@@ -174,7 +183,7 @@ struct fuse_args { + * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept + */ + typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, +- struct fuse_args *outargs); ++ struct fuse_args *outargs); + + /** + * Option parsing function +@@ -197,7 +206,7 @@ typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, + * @return -1 on error, 0 on success + */ + int fuse_opt_parse(struct fuse_args *args, void *data, +- const struct fuse_opt opts[], fuse_opt_proc_t proc); ++ const struct fuse_opt opts[], fuse_opt_proc_t proc); + + /** + * Add an option to a comma separated option list +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index 4271947..19d6791 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -1,91 +1,95 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Utility functions for setting signal handlers. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Utility functions for setting signal handlers. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "config.h" +-#include "fuse_lowlevel.h" + #include "fuse_i.h" ++#include "fuse_lowlevel.h" + +-#include +-#include + #include ++#include + #include ++#include + + static struct fuse_session *fuse_instance; + + static void exit_handler(int sig) + { +- if (fuse_instance) { +- fuse_session_exit(fuse_instance); +- if(sig <= 0) { +- fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); +- abort(); +- } +- fuse_instance->error = sig; +- } ++ if (fuse_instance) { ++ fuse_session_exit(fuse_instance); ++ if (sig <= 0) { ++ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); ++ abort(); ++ } ++ fuse_instance->error = sig; ++ } + } + + static void do_nothing(int sig) + { +- (void) sig; ++ (void)sig; + } + + static int set_one_signal_handler(int sig, void (*handler)(int), int remove) + { +- struct sigaction sa; +- struct sigaction old_sa; ++ struct sigaction sa; ++ struct sigaction old_sa; + +- memset(&sa, 0, sizeof(struct sigaction)); +- sa.sa_handler = remove ? SIG_DFL : handler; +- sigemptyset(&(sa.sa_mask)); +- sa.sa_flags = 0; ++ memset(&sa, 0, sizeof(struct sigaction)); ++ sa.sa_handler = remove ? SIG_DFL : handler; ++ sigemptyset(&(sa.sa_mask)); ++ sa.sa_flags = 0; + +- if (sigaction(sig, NULL, &old_sa) == -1) { +- perror("fuse: cannot get old signal handler"); +- return -1; +- } ++ if (sigaction(sig, NULL, &old_sa) == -1) { ++ perror("fuse: cannot get old signal handler"); ++ return -1; ++ } + +- if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && +- sigaction(sig, &sa, NULL) == -1) { +- perror("fuse: cannot set signal handler"); +- return -1; +- } +- return 0; ++ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && ++ sigaction(sig, &sa, NULL) == -1) { ++ perror("fuse: cannot set signal handler"); ++ return -1; ++ } ++ return 0; + } + + int fuse_set_signal_handlers(struct fuse_session *se) + { +- /* If we used SIG_IGN instead of the do_nothing function, +- then we would be unable to tell if we set SIG_IGN (and +- thus should reset to SIG_DFL in fuse_remove_signal_handlers) +- or if it was already set to SIG_IGN (and should be left +- untouched. */ +- if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) +- return -1; ++ /* ++ * If we used SIG_IGN instead of the do_nothing function, ++ * then we would be unable to tell if we set SIG_IGN (and ++ * thus should reset to SIG_DFL in fuse_remove_signal_handlers) ++ * or if it was already set to SIG_IGN (and should be left ++ * untouched. ++ */ ++ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) { ++ return -1; ++ } + +- fuse_instance = se; +- return 0; ++ fuse_instance = se; ++ return 0; + } + + void fuse_remove_signal_handlers(struct fuse_session *se) + { +- if (fuse_instance != se) +- fuse_log(FUSE_LOG_ERR, +- "fuse: fuse_remove_signal_handlers: unknown session\n"); +- else +- fuse_instance = NULL; ++ if (fuse_instance != se) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: fuse_remove_signal_handlers: unknown session\n"); ++ } else { ++ fuse_instance = NULL; ++ } + +- set_one_signal_handler(SIGHUP, exit_handler, 1); +- set_one_signal_handler(SIGINT, exit_handler, 1); +- set_one_signal_handler(SIGTERM, exit_handler, 1); +- set_one_signal_handler(SIGPIPE, do_nothing, 1); ++ set_one_signal_handler(SIGHUP, exit_handler, 1); ++ set_one_signal_handler(SIGINT, exit_handler, 1); ++ set_one_signal_handler(SIGTERM, exit_handler, 1); ++ set_one_signal_handler(SIGPIPE, do_nothing, 1); + } +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5a2e64c..5711dd2 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -1,297 +1,309 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Helper functions to create (simple) standalone programs. With the ++ * aid of these functions it should be possible to create full FUSE ++ * file system by implementing nothing but the request handlers. + +- Helper functions to create (simple) standalone programs. With the +- aid of these functions it should be possible to create full FUSE +- file system by implementing nothing but the request handlers. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #include "config.h" + #include "fuse_i.h" ++#include "fuse_lowlevel.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "fuse_lowlevel.h" + #include "mount_util.h" + ++#include ++#include ++#include + #include + #include +-#include +-#include + #include +-#include +-#include + #include ++#include + +-#define FUSE_HELPER_OPT(t, p) \ +- { t, offsetof(struct fuse_cmdline_opts, p), 1 } ++#define FUSE_HELPER_OPT(t, p) \ ++ { \ ++ t, offsetof(struct fuse_cmdline_opts, p), 1 \ ++ } + + static const struct fuse_opt fuse_helper_opts[] = { +- FUSE_HELPER_OPT("-h", show_help), +- FUSE_HELPER_OPT("--help", show_help), +- FUSE_HELPER_OPT("-V", show_version), +- FUSE_HELPER_OPT("--version", show_version), +- FUSE_HELPER_OPT("-d", debug), +- FUSE_HELPER_OPT("debug", debug), +- FUSE_HELPER_OPT("-d", foreground), +- FUSE_HELPER_OPT("debug", foreground), +- FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), +- FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("-f", foreground), +- FUSE_HELPER_OPT("fsname=", nodefault_subtype), +- FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("subtype=", nodefault_subtype), +- FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), +- FUSE_OPT_END ++ FUSE_HELPER_OPT("-h", show_help), ++ FUSE_HELPER_OPT("--help", show_help), ++ FUSE_HELPER_OPT("-V", show_version), ++ FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("-d", debug), ++ FUSE_HELPER_OPT("debug", debug), ++ FUSE_HELPER_OPT("-d", foreground), ++ FUSE_HELPER_OPT("debug", foreground), ++ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), ++ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT("fsname=", nodefault_subtype), ++ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("subtype=", nodefault_subtype), ++ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_OPT_END + }; + + struct fuse_conn_info_opts { +- int atomic_o_trunc; +- int no_remote_posix_lock; +- int no_remote_flock; +- int splice_write; +- int splice_move; +- int splice_read; +- int no_splice_write; +- int no_splice_move; +- int no_splice_read; +- int auto_inval_data; +- int no_auto_inval_data; +- int no_readdirplus; +- int no_readdirplus_auto; +- int async_dio; +- int no_async_dio; +- int writeback_cache; +- int no_writeback_cache; +- int async_read; +- int sync_read; +- unsigned max_write; +- unsigned max_readahead; +- unsigned max_background; +- unsigned congestion_threshold; +- unsigned time_gran; +- int set_max_write; +- int set_max_readahead; +- int set_max_background; +- int set_congestion_threshold; +- int set_time_gran; ++ int atomic_o_trunc; ++ int no_remote_posix_lock; ++ int no_remote_flock; ++ int splice_write; ++ int splice_move; ++ int splice_read; ++ int no_splice_write; ++ int no_splice_move; ++ int no_splice_read; ++ int auto_inval_data; ++ int no_auto_inval_data; ++ int no_readdirplus; ++ int no_readdirplus_auto; ++ int async_dio; ++ int no_async_dio; ++ int writeback_cache; ++ int no_writeback_cache; ++ int async_read; ++ int sync_read; ++ unsigned max_write; ++ unsigned max_readahead; ++ unsigned max_background; ++ unsigned congestion_threshold; ++ unsigned time_gran; ++ int set_max_write; ++ int set_max_readahead; ++ int set_max_background; ++ int set_congestion_threshold; ++ int set_time_gran; + }; + +-#define CONN_OPTION(t, p, v) \ +- { t, offsetof(struct fuse_conn_info_opts, p), v } ++#define CONN_OPTION(t, p, v) \ ++ { \ ++ t, offsetof(struct fuse_conn_info_opts, p), v \ ++ } + static const struct fuse_opt conn_info_opt_spec[] = { +- CONN_OPTION("max_write=%u", max_write, 0), +- CONN_OPTION("max_write=", set_max_write, 1), +- CONN_OPTION("max_readahead=%u", max_readahead, 0), +- CONN_OPTION("max_readahead=", set_max_readahead, 1), +- CONN_OPTION("max_background=%u", max_background, 0), +- CONN_OPTION("max_background=", set_max_background, 1), +- CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), +- CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), +- CONN_OPTION("sync_read", sync_read, 1), +- CONN_OPTION("async_read", async_read, 1), +- CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), +- CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), +- CONN_OPTION("no_remote_lock", no_remote_flock, 1), +- CONN_OPTION("no_remote_flock", no_remote_flock, 1), +- CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), +- CONN_OPTION("splice_write", splice_write, 1), +- CONN_OPTION("no_splice_write", no_splice_write, 1), +- CONN_OPTION("splice_move", splice_move, 1), +- CONN_OPTION("no_splice_move", no_splice_move, 1), +- CONN_OPTION("splice_read", splice_read, 1), +- CONN_OPTION("no_splice_read", no_splice_read, 1), +- CONN_OPTION("auto_inval_data", auto_inval_data, 1), +- CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), +- CONN_OPTION("readdirplus=no", no_readdirplus, 1), +- CONN_OPTION("readdirplus=yes", no_readdirplus, 0), +- CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), +- CONN_OPTION("readdirplus=auto", no_readdirplus, 0), +- CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), +- CONN_OPTION("async_dio", async_dio, 1), +- CONN_OPTION("no_async_dio", no_async_dio, 1), +- CONN_OPTION("writeback_cache", writeback_cache, 1), +- CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), +- CONN_OPTION("time_gran=%u", time_gran, 0), +- CONN_OPTION("time_gran=", set_time_gran, 1), +- FUSE_OPT_END ++ CONN_OPTION("max_write=%u", max_write, 0), ++ CONN_OPTION("max_write=", set_max_write, 1), ++ CONN_OPTION("max_readahead=%u", max_readahead, 0), ++ CONN_OPTION("max_readahead=", set_max_readahead, 1), ++ CONN_OPTION("max_background=%u", max_background, 0), ++ CONN_OPTION("max_background=", set_max_background, 1), ++ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), ++ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), ++ CONN_OPTION("sync_read", sync_read, 1), ++ CONN_OPTION("async_read", async_read, 1), ++ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), ++ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("no_remote_lock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_flock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("splice_write", splice_write, 1), ++ CONN_OPTION("no_splice_write", no_splice_write, 1), ++ CONN_OPTION("splice_move", splice_move, 1), ++ CONN_OPTION("no_splice_move", no_splice_move, 1), ++ CONN_OPTION("splice_read", splice_read, 1), ++ CONN_OPTION("no_splice_read", no_splice_read, 1), ++ CONN_OPTION("auto_inval_data", auto_inval_data, 1), ++ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), ++ CONN_OPTION("readdirplus=no", no_readdirplus, 1), ++ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), ++ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), ++ CONN_OPTION("async_dio", async_dio, 1), ++ CONN_OPTION("no_async_dio", no_async_dio, 1), ++ CONN_OPTION("writeback_cache", writeback_cache, 1), ++ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), ++ CONN_OPTION("time_gran=%u", time_gran, 0), ++ CONN_OPTION("time_gran=", set_time_gran, 1), ++ FUSE_OPT_END + }; + + + void fuse_cmdline_help(void) + { +- printf(" -h --help print help\n" +- " -V --version print version\n" +- " -d -o debug enable debug output (implies -f)\n" +- " -f foreground operation\n" +- " -o max_idle_threads the maximum number of idle worker threads\n" +- " allowed (default: 10)\n"); ++ printf( ++ " -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -o max_idle_threads the maximum number of idle worker threads\n" ++ " allowed (default: 10)\n"); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +- struct fuse_args *outargs) ++ struct fuse_args *outargs) + { +- (void) outargs; +- struct fuse_cmdline_opts *opts = data; +- +- switch (key) { +- case FUSE_OPT_KEY_NONOPT: +- if (!opts->mountpoint) { +- if (fuse_mnt_parse_fuse_fd(arg) != -1) { +- return fuse_opt_add_opt(&opts->mountpoint, arg); +- } +- +- char mountpoint[PATH_MAX] = ""; +- if (realpath(arg, mountpoint) == NULL) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: bad mount point `%s': %s\n", +- arg, strerror(errno)); +- return -1; +- } +- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); +- } else { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); +- return -1; +- } +- +- default: +- /* Pass through unknown options */ +- return 1; +- } ++ (void)outargs; ++ struct fuse_cmdline_opts *opts = data; ++ ++ switch (key) { ++ case FUSE_OPT_KEY_NONOPT: ++ if (!opts->mountpoint) { ++ if (fuse_mnt_parse_fuse_fd(arg) != -1) { ++ return fuse_opt_add_opt(&opts->mountpoint, arg); ++ } ++ ++ char mountpoint[PATH_MAX] = ""; ++ if (realpath(arg, mountpoint) == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, ++ strerror(errno)); ++ return -1; ++ } ++ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); ++ } else { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; ++ } ++ ++ default: ++ /* Pass through unknown options */ ++ return 1; ++ } + } + +-int fuse_parse_cmdline(struct fuse_args *args, +- struct fuse_cmdline_opts *opts) ++int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + { +- memset(opts, 0, sizeof(struct fuse_cmdline_opts)); ++ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); + +- opts->max_idle_threads = 10; ++ opts->max_idle_threads = 10; + +- if (fuse_opt_parse(args, opts, fuse_helper_opts, +- fuse_helper_opt_proc) == -1) +- return -1; ++ if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == ++ -1) { ++ return -1; ++ } + +- return 0; ++ return 0; + } + + + int fuse_daemonize(int foreground) + { +- if (!foreground) { +- int nullfd; +- int waiter[2]; +- char completed; +- +- if (pipe(waiter)) { +- perror("fuse_daemonize: pipe"); +- return -1; +- } +- +- /* +- * demonize current process by forking it and killing the +- * parent. This makes current process as a child of 'init'. +- */ +- switch(fork()) { +- case -1: +- perror("fuse_daemonize: fork"); +- return -1; +- case 0: +- break; +- default: +- (void) read(waiter[0], &completed, sizeof(completed)); +- _exit(0); +- } +- +- if (setsid() == -1) { +- perror("fuse_daemonize: setsid"); +- return -1; +- } +- +- (void) chdir("/"); +- +- nullfd = open("/dev/null", O_RDWR, 0); +- if (nullfd != -1) { +- (void) dup2(nullfd, 0); +- (void) dup2(nullfd, 1); +- (void) dup2(nullfd, 2); +- if (nullfd > 2) +- close(nullfd); +- } +- +- /* Propagate completion of daemon initialization */ +- completed = 1; +- (void) write(waiter[1], &completed, sizeof(completed)); +- close(waiter[0]); +- close(waiter[1]); +- } else { +- (void) chdir("/"); +- } +- return 0; ++ if (!foreground) { ++ int nullfd; ++ int waiter[2]; ++ char completed; ++ ++ if (pipe(waiter)) { ++ perror("fuse_daemonize: pipe"); ++ return -1; ++ } ++ ++ /* ++ * demonize current process by forking it and killing the ++ * parent. This makes current process as a child of 'init'. ++ */ ++ switch (fork()) { ++ case -1: ++ perror("fuse_daemonize: fork"); ++ return -1; ++ case 0: ++ break; ++ default: ++ (void)read(waiter[0], &completed, sizeof(completed)); ++ _exit(0); ++ } ++ ++ if (setsid() == -1) { ++ perror("fuse_daemonize: setsid"); ++ return -1; ++ } ++ ++ (void)chdir("/"); ++ ++ nullfd = open("/dev/null", O_RDWR, 0); ++ if (nullfd != -1) { ++ (void)dup2(nullfd, 0); ++ (void)dup2(nullfd, 1); ++ (void)dup2(nullfd, 2); ++ if (nullfd > 2) { ++ close(nullfd); ++ } ++ } ++ ++ /* Propagate completion of daemon initialization */ ++ completed = 1; ++ (void)write(waiter[1], &completed, sizeof(completed)); ++ close(waiter[0]); ++ close(waiter[1]); ++ } else { ++ (void)chdir("/"); ++ } ++ return 0; + } + + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +- struct fuse_conn_info *conn) ++ struct fuse_conn_info *conn) + { +- if(opts->set_max_write) +- conn->max_write = opts->max_write; +- if(opts->set_max_background) +- conn->max_background = opts->max_background; +- if(opts->set_congestion_threshold) +- conn->congestion_threshold = opts->congestion_threshold; +- if(opts->set_time_gran) +- conn->time_gran = opts->time_gran; +- if(opts->set_max_readahead) +- conn->max_readahead = opts->max_readahead; +- +-#define LL_ENABLE(cond,cap) \ +- if (cond) conn->want |= (cap) +-#define LL_DISABLE(cond,cap) \ +- if (cond) conn->want &= ~(cap) +- +- LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); +- LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); +- +- LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); +- LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); +- +- LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); +- LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); +- +- LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); +- LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); +- +- LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); +- LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); +- +- LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); +- LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); +- +- LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); +- LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); +- +- LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); +- LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); +- +- LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); +- LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); ++ if (opts->set_max_write) { ++ conn->max_write = opts->max_write; ++ } ++ if (opts->set_max_background) { ++ conn->max_background = opts->max_background; ++ } ++ if (opts->set_congestion_threshold) { ++ conn->congestion_threshold = opts->congestion_threshold; ++ } ++ if (opts->set_time_gran) { ++ conn->time_gran = opts->time_gran; ++ } ++ if (opts->set_max_readahead) { ++ conn->max_readahead = opts->max_readahead; ++ } ++ ++#define LL_ENABLE(cond, cap) \ ++ if (cond) \ ++ conn->want |= (cap) ++#define LL_DISABLE(cond, cap) \ ++ if (cond) \ ++ conn->want &= ~(cap) ++ ++ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); ++ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); ++ ++ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); ++ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); ++ ++ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); ++ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); ++ ++ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ ++ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); ++ ++ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); ++ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); ++ ++ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ ++ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); ++ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); ++ ++ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); ++ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); + } + +-struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) ++struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args) + { +- struct fuse_conn_info_opts *opts; +- +- opts = calloc(1, sizeof(struct fuse_conn_info_opts)); +- if(opts == NULL) { +- fuse_log(FUSE_LOG_ERR, "calloc failed\n"); +- return NULL; +- } +- if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { +- free(opts); +- return NULL; +- } +- return opts; ++ struct fuse_conn_info_opts *opts; ++ ++ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); ++ if (opts == NULL) { ++ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); ++ return NULL; ++ } ++ if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { ++ free(opts); ++ return NULL; ++ } ++ return opts; + } +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +index 7c5f561..0b98275 100644 +--- a/tools/virtiofsd/passthrough_helpers.h ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -28,23 +28,24 @@ + * operation + */ + static int mknod_wrapper(int dirfd, const char *path, const char *link, +- int mode, dev_t rdev) ++ int mode, dev_t rdev) + { +- int res; ++ int res; + +- if (S_ISREG(mode)) { +- res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); +- if (res >= 0) +- res = close(res); +- } else if (S_ISDIR(mode)) { +- res = mkdirat(dirfd, path, mode); +- } else if (S_ISLNK(mode) && link != NULL) { +- res = symlinkat(link, dirfd, path); +- } else if (S_ISFIFO(mode)) { +- res = mkfifoat(dirfd, path, mode); +- } else { +- res = mknodat(dirfd, path, mode, rdev); +- } ++ if (S_ISREG(mode)) { ++ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); ++ if (res >= 0) { ++ res = close(res); ++ } ++ } else if (S_ISDIR(mode)) { ++ res = mkdirat(dirfd, path, mode); ++ } else if (S_ISLNK(mode) && link != NULL) { ++ res = symlinkat(link, dirfd, path); ++ } else if (S_ISFIFO(mode)) { ++ res = mkfifoat(dirfd, path, mode); ++ } else { ++ res = mknodat(dirfd, path, mode, rdev); ++ } + +- return res; ++ return res; + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e5f7115..c5850ef 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1,12 +1,12 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU GPLv2. +- See the file COPYING. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU GPLv2. ++ * See the file COPYING. ++ */ + +-/** @file ++/* + * + * This file system mirrors the existing file system hierarchy of the + * system, starting at the root file system. This is implemented by +@@ -28,7 +28,8 @@ + * + * Compile with: + * +- * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll ++ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o ++ * passthrough_ll + * + * ## Source code ## + * \include passthrough_ll.c +@@ -39,1299 +40,1365 @@ + + #include "config.h" + +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include + #include ++#include + #include ++#include + #include ++#include + #include ++#include ++#include ++#include ++#include ++#include + #include + #include ++#include + + #include "passthrough_helpers.h" + +-/* We are re-using pointers to our `struct lo_inode` and `struct +- lo_dirp` elements as inodes. This means that we must be able to +- store uintptr_t values in a fuse_ino_t variable. The following +- incantation checks this condition at compile time. */ +-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++/* ++ * We are re-using pointers to our `struct lo_inode` and `struct ++ * lo_dirp` elements as inodes. This means that we must be able to ++ * store uintptr_t values in a fuse_ino_t variable. The following ++ * incantation checks this condition at compile time. ++ */ ++#if defined(__GNUC__) && \ ++ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ ++ !defined __cplusplus + _Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), +- "fuse_ino_t too small to hold uintptr_t values!"); ++ "fuse_ino_t too small to hold uintptr_t values!"); + #else +-struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ +- { unsigned _uintptr_to_must_hold_fuse_ino_t: +- ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; ++struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { ++ unsigned _uintptr_to_must_hold_fuse_ino_t ++ : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); ++}; + #endif + + struct lo_inode { +- struct lo_inode *next; /* protected by lo->mutex */ +- struct lo_inode *prev; /* protected by lo->mutex */ +- int fd; +- bool is_symlink; +- ino_t ino; +- dev_t dev; +- uint64_t refcount; /* protected by lo->mutex */ ++ struct lo_inode *next; /* protected by lo->mutex */ ++ struct lo_inode *prev; /* protected by lo->mutex */ ++ int fd; ++ bool is_symlink; ++ ino_t ino; ++ dev_t dev; ++ uint64_t refcount; /* protected by lo->mutex */ + }; + + enum { +- CACHE_NEVER, +- CACHE_NORMAL, +- CACHE_ALWAYS, ++ CACHE_NEVER, ++ CACHE_NORMAL, ++ CACHE_ALWAYS, + }; + + struct lo_data { +- pthread_mutex_t mutex; +- int debug; +- int writeback; +- int flock; +- int xattr; +- const char *source; +- double timeout; +- int cache; +- int timeout_set; +- struct lo_inode root; /* protected by lo->mutex */ ++ pthread_mutex_t mutex; ++ int debug; ++ int writeback; ++ int flock; ++ int xattr; ++ const char *source; ++ double timeout; ++ int cache; ++ int timeout_set; ++ struct lo_inode root; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +- { "writeback", +- offsetof(struct lo_data, writeback), 1 }, +- { "no_writeback", +- offsetof(struct lo_data, writeback), 0 }, +- { "source=%s", +- offsetof(struct lo_data, source), 0 }, +- { "flock", +- offsetof(struct lo_data, flock), 1 }, +- { "no_flock", +- offsetof(struct lo_data, flock), 0 }, +- { "xattr", +- offsetof(struct lo_data, xattr), 1 }, +- { "no_xattr", +- offsetof(struct lo_data, xattr), 0 }, +- { "timeout=%lf", +- offsetof(struct lo_data, timeout), 0 }, +- { "timeout=", +- offsetof(struct lo_data, timeout_set), 1 }, +- { "cache=never", +- offsetof(struct lo_data, cache), CACHE_NEVER }, +- { "cache=auto", +- offsetof(struct lo_data, cache), CACHE_NORMAL }, +- { "cache=always", +- offsetof(struct lo_data, cache), CACHE_ALWAYS }, +- +- FUSE_OPT_END ++ { "writeback", offsetof(struct lo_data, writeback), 1 }, ++ { "no_writeback", offsetof(struct lo_data, writeback), 0 }, ++ { "source=%s", offsetof(struct lo_data, source), 0 }, ++ { "flock", offsetof(struct lo_data, flock), 1 }, ++ { "no_flock", offsetof(struct lo_data, flock), 0 }, ++ { "xattr", offsetof(struct lo_data, xattr), 1 }, ++ { "no_xattr", offsetof(struct lo_data, xattr), 0 }, ++ { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, ++ { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, ++ { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, ++ { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, ++ ++ FUSE_OPT_END + }; + + static struct lo_data *lo_data(fuse_req_t req) + { +- return (struct lo_data *) fuse_req_userdata(req); ++ return (struct lo_data *)fuse_req_userdata(req); + } + + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { +- if (ino == FUSE_ROOT_ID) +- return &lo_data(req)->root; +- else +- return (struct lo_inode *) (uintptr_t) ino; ++ if (ino == FUSE_ROOT_ID) { ++ return &lo_data(req)->root; ++ } else { ++ return (struct lo_inode *)(uintptr_t)ino; ++ } + } + + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { +- return lo_inode(req, ino)->fd; ++ return lo_inode(req, ino)->fd; + } + + static bool lo_debug(fuse_req_t req) + { +- return lo_data(req)->debug != 0; ++ return lo_data(req)->debug != 0; + } + +-static void lo_init(void *userdata, +- struct fuse_conn_info *conn) ++static void lo_init(void *userdata, struct fuse_conn_info *conn) + { +- struct lo_data *lo = (struct lo_data*) userdata; +- +- if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) +- conn->want |= FUSE_CAP_EXPORT_SUPPORT; +- +- if (lo->writeback && +- conn->capable & FUSE_CAP_WRITEBACK_CACHE) { +- if (lo->debug) +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); +- conn->want |= FUSE_CAP_WRITEBACK_CACHE; +- } +- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- if (lo->debug) +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- conn->want |= FUSE_CAP_FLOCK_LOCKS; +- } ++ struct lo_data *lo = (struct lo_data *)userdata; ++ ++ if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { ++ conn->want |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ ++ if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { ++ if (lo->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); ++ } ++ conn->want |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ } ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } + } + + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- struct stat buf; +- struct lo_data *lo = lo_data(req); ++ int res; ++ struct stat buf; ++ struct lo_data *lo = lo_data(req); + +- (void) fi; ++ (void)fi; + +- res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- return (void) fuse_reply_err(req, errno); ++ res = ++ fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } + +- fuse_reply_attr(req, &buf, lo->timeout); ++ fuse_reply_attr(req, &buf, lo->timeout); + } + + static int utimensat_empty_nofollow(struct lo_inode *inode, +- const struct timespec *tv) ++ const struct timespec *tv) + { +- int res; +- char procname[64]; +- +- if (inode->is_symlink) { +- res = utimensat(inode->fd, "", tv, +- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1 && errno == EINVAL) { +- /* Sorry, no race free way to set times on symlink. */ +- errno = EPERM; +- } +- return res; +- } +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- return utimensat(AT_FDCWD, procname, tv, 0); ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1 && errno == EINVAL) { ++ /* Sorry, no race free way to set times on symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return utimensat(AT_FDCWD, procname, tv, 0); + } + + static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, +- int valid, struct fuse_file_info *fi) ++ int valid, struct fuse_file_info *fi) + { +- int saverr; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- int ifd = inode->fd; +- int res; +- +- if (valid & FUSE_SET_ATTR_MODE) { +- if (fi) { +- res = fchmod(fi->fh, attr->st_mode); +- } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = chmod(procname, attr->st_mode); +- } +- if (res == -1) +- goto out_err; +- } +- if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { +- uid_t uid = (valid & FUSE_SET_ATTR_UID) ? +- attr->st_uid : (uid_t) -1; +- gid_t gid = (valid & FUSE_SET_ATTR_GID) ? +- attr->st_gid : (gid_t) -1; +- +- res = fchownat(ifd, "", uid, gid, +- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- } +- if (valid & FUSE_SET_ATTR_SIZE) { +- if (fi) { +- res = ftruncate(fi->fh, attr->st_size); +- } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = truncate(procname, attr->st_size); +- } +- if (res == -1) +- goto out_err; +- } +- if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { +- struct timespec tv[2]; +- +- tv[0].tv_sec = 0; +- tv[1].tv_sec = 0; +- tv[0].tv_nsec = UTIME_OMIT; +- tv[1].tv_nsec = UTIME_OMIT; +- +- if (valid & FUSE_SET_ATTR_ATIME_NOW) +- tv[0].tv_nsec = UTIME_NOW; +- else if (valid & FUSE_SET_ATTR_ATIME) +- tv[0] = attr->st_atim; +- +- if (valid & FUSE_SET_ATTR_MTIME_NOW) +- tv[1].tv_nsec = UTIME_NOW; +- else if (valid & FUSE_SET_ATTR_MTIME) +- tv[1] = attr->st_mtim; +- +- if (fi) +- res = futimens(fi->fh, tv); +- else +- res = utimensat_empty_nofollow(inode, tv); +- if (res == -1) +- goto out_err; +- } +- +- return lo_getattr(req, ino, fi); ++ int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ int ifd = inode->fd; ++ int res; ++ ++ if (valid & FUSE_SET_ATTR_MODE) { ++ if (fi) { ++ res = fchmod(fi->fh, attr->st_mode); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = chmod(procname, attr->st_mode); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { ++ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; ++ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; ++ ++ res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & FUSE_SET_ATTR_SIZE) { ++ if (fi) { ++ res = ftruncate(fi->fh, attr->st_size); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = truncate(procname, attr->st_size); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { ++ struct timespec tv[2]; ++ ++ tv[0].tv_sec = 0; ++ tv[1].tv_sec = 0; ++ tv[0].tv_nsec = UTIME_OMIT; ++ tv[1].tv_nsec = UTIME_OMIT; ++ ++ if (valid & FUSE_SET_ATTR_ATIME_NOW) { ++ tv[0].tv_nsec = UTIME_NOW; ++ } else if (valid & FUSE_SET_ATTR_ATIME) { ++ tv[0] = attr->st_atim; ++ } ++ ++ if (valid & FUSE_SET_ATTR_MTIME_NOW) { ++ tv[1].tv_nsec = UTIME_NOW; ++ } else if (valid & FUSE_SET_ATTR_MTIME) { ++ tv[1] = attr->st_mtim; ++ } ++ ++ if (fi) { ++ res = futimens(fi->fh, tv); ++ } else { ++ res = utimensat_empty_nofollow(inode, tv); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ ++ return lo_getattr(req, ino, fi); + + out_err: +- saverr = errno; +- fuse_reply_err(req, saverr); ++ saverr = errno; ++ fuse_reply_err(req, saverr); + } + + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + { +- struct lo_inode *p; +- struct lo_inode *ret = NULL; +- +- pthread_mutex_lock(&lo->mutex); +- for (p = lo->root.next; p != &lo->root; p = p->next) { +- if (p->ino == st->st_ino && p->dev == st->st_dev) { +- assert(p->refcount > 0); +- ret = p; +- ret->refcount++; +- break; +- } +- } +- pthread_mutex_unlock(&lo->mutex); +- return ret; ++ struct lo_inode *p; ++ struct lo_inode *ret = NULL; ++ ++ pthread_mutex_lock(&lo->mutex); ++ for (p = lo->root.next; p != &lo->root; p = p->next) { ++ if (p->ino == st->st_ino && p->dev == st->st_dev) { ++ assert(p->refcount > 0); ++ ret = p; ++ ret->refcount++; ++ break; ++ } ++ } ++ pthread_mutex_unlock(&lo->mutex); ++ return ret; + } + + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +- struct fuse_entry_param *e) ++ struct fuse_entry_param *e) + { +- int newfd; +- int res; +- int saverr; +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode; +- +- memset(e, 0, sizeof(*e)); +- e->attr_timeout = lo->timeout; +- e->entry_timeout = lo->timeout; +- +- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); +- if (newfd == -1) +- goto out_err; +- +- res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- +- inode = lo_find(lo_data(req), &e->attr); +- if (inode) { +- close(newfd); +- newfd = -1; +- } else { +- struct lo_inode *prev, *next; +- +- saverr = ENOMEM; +- inode = calloc(1, sizeof(struct lo_inode)); +- if (!inode) +- goto out_err; +- +- inode->is_symlink = S_ISLNK(e->attr.st_mode); +- inode->refcount = 1; +- inode->fd = newfd; +- inode->ino = e->attr.st_ino; +- inode->dev = e->attr.st_dev; +- +- pthread_mutex_lock(&lo->mutex); +- prev = &lo->root; +- next = prev->next; +- next->prev = inode; +- inode->next = next; +- inode->prev = prev; +- prev->next = inode; +- pthread_mutex_unlock(&lo->mutex); +- } +- e->ino = (uintptr_t) inode; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, (unsigned long long) e->ino); +- +- return 0; ++ int newfd; ++ int res; ++ int saverr; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ ++ memset(e, 0, sizeof(*e)); ++ e->attr_timeout = lo->timeout; ++ e->entry_timeout = lo->timeout; ++ ++ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ if (newfd == -1) { ++ goto out_err; ++ } ++ ++ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ inode = lo_find(lo_data(req), &e->attr); ++ if (inode) { ++ close(newfd); ++ newfd = -1; ++ } else { ++ struct lo_inode *prev, *next; ++ ++ saverr = ENOMEM; ++ inode = calloc(1, sizeof(struct lo_inode)); ++ if (!inode) { ++ goto out_err; ++ } ++ ++ inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ inode->refcount = 1; ++ inode->fd = newfd; ++ inode->ino = e->attr.st_ino; ++ inode->dev = e->attr.st_dev; ++ ++ pthread_mutex_lock(&lo->mutex); ++ prev = &lo->root; ++ next = prev->next; ++ next->prev = inode; ++ inode->next = next; ++ inode->prev = prev; ++ prev->next = inode; ++ pthread_mutex_unlock(&lo->mutex); ++ } ++ e->ino = (uintptr_t)inode; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e->ino); ++ } ++ ++ return 0; + + out_err: +- saverr = errno; +- if (newfd != -1) +- close(newfd); +- return saverr; ++ saverr = errno; ++ if (newfd != -1) { ++ close(newfd); ++ } ++ return saverr; + } + + static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- struct fuse_entry_param e; +- int err; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- +- err = lo_do_lookup(req, parent, name, &e); +- if (err) +- fuse_reply_err(req, err); +- else +- fuse_reply_entry(req, &e); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ } ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_entry(req, &e); ++ } + } + + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, +- const char *name, mode_t mode, dev_t rdev, +- const char *link) ++ const char *name, mode_t mode, dev_t rdev, ++ const char *link) + { +- int res; +- int saverr; +- struct lo_inode *dir = lo_inode(req, parent); +- struct fuse_entry_param e; ++ int res; ++ int saverr; ++ struct lo_inode *dir = lo_inode(req, parent); ++ struct fuse_entry_param e; + +- saverr = ENOMEM; ++ saverr = ENOMEM; + +- res = mknod_wrapper(dir->fd, name, link, mode, rdev); ++ res = mknod_wrapper(dir->fd, name, link, mode, rdev); + +- saverr = errno; +- if (res == -1) +- goto out; ++ saverr = errno; ++ if (res == -1) { ++ goto out; ++ } + +- saverr = lo_do_lookup(req, parent, name, &e); +- if (saverr) +- goto out; ++ saverr = lo_do_lookup(req, parent, name, &e); ++ if (saverr) { ++ goto out; ++ } + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, (unsigned long long) e.ino); ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e.ino); ++ } + +- fuse_reply_entry(req, &e); +- return; ++ fuse_reply_entry(req, &e); ++ return; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + +-static void lo_mknod(fuse_req_t req, fuse_ino_t parent, +- const char *name, mode_t mode, dev_t rdev) ++static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev) + { +- lo_mknod_symlink(req, parent, name, mode, rdev, NULL); ++ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); + } + + static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode) ++ mode_t mode) + { +- lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); ++ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); + } + +-static void lo_symlink(fuse_req_t req, const char *link, +- fuse_ino_t parent, const char *name) ++static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name) + { +- lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); ++ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); + } + + static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, +- const char *name) ++ const char *name) + { +- int res; +- char procname[64]; ++ int res; ++ char procname[64]; + +- if (inode->is_symlink) { +- res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); +- if (res == -1 && (errno == ENOENT || errno == EINVAL)) { +- /* Sorry, no race free way to hard-link a symlink. */ +- errno = EPERM; +- } +- return res; +- } ++ if (inode->is_symlink) { ++ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); ++ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { ++ /* Sorry, no race free way to hard-link a symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); + } + + static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, +- const char *name) ++ const char *name) + { +- int res; +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); +- struct fuse_entry_param e; +- int saverr; +- +- memset(&e, 0, sizeof(struct fuse_entry_param)); +- e.attr_timeout = lo->timeout; +- e.entry_timeout = lo->timeout; +- +- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); +- if (res == -1) +- goto out_err; +- +- res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- +- pthread_mutex_lock(&lo->mutex); +- inode->refcount++; +- pthread_mutex_unlock(&lo->mutex); +- e.ino = (uintptr_t) inode; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, +- (unsigned long long) e.ino); +- +- fuse_reply_entry(req, &e); +- return; ++ int res; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct fuse_entry_param e; ++ int saverr; ++ ++ memset(&e, 0, sizeof(struct fuse_entry_param)); ++ e.attr_timeout = lo->timeout; ++ e.entry_timeout = lo->timeout; ++ ++ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ inode->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ e.ino = (uintptr_t)inode; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e.ino); ++ } ++ ++ fuse_reply_entry(req, &e); ++ return; + + out_err: +- saverr = errno; +- fuse_reply_err(req, saverr); ++ saverr = errno; ++ fuse_reply_err(req, saverr); + } + + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- int res; ++ int res; + +- res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); ++ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +- fuse_ino_t newparent, const char *newname, +- unsigned int flags) ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags) + { +- int res; ++ int res; + +- if (flags) { +- fuse_reply_err(req, EINVAL); +- return; +- } ++ if (flags) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + +- res = renameat(lo_fd(req, parent), name, +- lo_fd(req, newparent), newname); ++ res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- int res; ++ int res; + +- res = unlinkat(lo_fd(req, parent), name, 0); ++ res = unlinkat(lo_fd(req, parent), name, 0); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + { +- if (!inode) +- return; +- +- pthread_mutex_lock(&lo->mutex); +- assert(inode->refcount >= n); +- inode->refcount -= n; +- if (!inode->refcount) { +- struct lo_inode *prev, *next; +- +- prev = inode->prev; +- next = inode->next; +- next->prev = prev; +- prev->next = next; +- +- pthread_mutex_unlock(&lo->mutex); +- close(inode->fd); +- free(inode); +- +- } else { +- pthread_mutex_unlock(&lo->mutex); +- } ++ if (!inode) { ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ assert(inode->refcount >= n); ++ inode->refcount -= n; ++ if (!inode->refcount) { ++ struct lo_inode *prev, *next; ++ ++ prev = inode->prev; ++ next = inode->next; ++ next->prev = prev; ++ prev->next = next; ++ ++ pthread_mutex_unlock(&lo->mutex); ++ close(inode->fd); ++ free(inode); ++ ++ } else { ++ pthread_mutex_unlock(&lo->mutex); ++ } + } + + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long) ino, +- (unsigned long long) inode->refcount, +- (unsigned long long) nlookup); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)nlookup); ++ } + +- unref_inode(lo, inode, nlookup); ++ unref_inode(lo, inode, nlookup); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { +- lo_forget_one(req, ino, nlookup); +- fuse_reply_none(req); ++ lo_forget_one(req, ino, nlookup); ++ fuse_reply_none(req); + } + + static void lo_forget_multi(fuse_req_t req, size_t count, +- struct fuse_forget_data *forgets) ++ struct fuse_forget_data *forgets) + { +- int i; ++ int i; + +- for (i = 0; i < count; i++) +- lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); +- fuse_reply_none(req); ++ for (i = 0; i < count; i++) { ++ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); ++ } ++ fuse_reply_none(req); + } + + static void lo_readlink(fuse_req_t req, fuse_ino_t ino) + { +- char buf[PATH_MAX + 1]; +- int res; ++ char buf[PATH_MAX + 1]; ++ int res; + +- res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); +- if (res == -1) +- return (void) fuse_reply_err(req, errno); ++ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } + +- if (res == sizeof(buf)) +- return (void) fuse_reply_err(req, ENAMETOOLONG); ++ if (res == sizeof(buf)) { ++ return (void)fuse_reply_err(req, ENAMETOOLONG); ++ } + +- buf[res] = '\0'; ++ buf[res] = '\0'; + +- fuse_reply_readlink(req, buf); ++ fuse_reply_readlink(req, buf); + } + + struct lo_dirp { +- DIR *dp; +- struct dirent *entry; +- off_t offset; ++ DIR *dp; ++ struct dirent *entry; ++ off_t offset; + }; + + static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) + { +- return (struct lo_dirp *) (uintptr_t) fi->fh; ++ return (struct lo_dirp *)(uintptr_t)fi->fh; + } + +-static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_opendir(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- int error = ENOMEM; +- struct lo_data *lo = lo_data(req); +- struct lo_dirp *d; +- int fd; +- +- d = calloc(1, sizeof(struct lo_dirp)); +- if (d == NULL) +- goto out_err; +- +- fd = openat(lo_fd(req, ino), ".", O_RDONLY); +- if (fd == -1) +- goto out_errno; +- +- d->dp = fdopendir(fd); +- if (d->dp == NULL) +- goto out_errno; +- +- d->offset = 0; +- d->entry = NULL; +- +- fi->fh = (uintptr_t) d; +- if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- fuse_reply_open(req, fi); +- return; ++ int error = ENOMEM; ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ int fd; ++ ++ d = calloc(1, sizeof(struct lo_dirp)); ++ if (d == NULL) { ++ goto out_err; ++ } ++ ++ fd = openat(lo_fd(req, ino), ".", O_RDONLY); ++ if (fd == -1) { ++ goto out_errno; ++ } ++ ++ d->dp = fdopendir(fd); ++ if (d->dp == NULL) { ++ goto out_errno; ++ } ++ ++ d->offset = 0; ++ d->entry = NULL; ++ ++ fi->fh = (uintptr_t)d; ++ if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ fuse_reply_open(req, fi); ++ return; + + out_errno: +- error = errno; ++ error = errno; + out_err: +- if (d) { +- if (fd != -1) +- close(fd); +- free(d); +- } +- fuse_reply_err(req, error); ++ if (d) { ++ if (fd != -1) { ++ close(fd); ++ } ++ free(d); ++ } ++ fuse_reply_err(req, error); + } + + static int is_dot_or_dotdot(const char *name) + { +- return name[0] == '.' && (name[1] == '\0' || +- (name[1] == '.' && name[2] == '\0')); ++ return name[0] == '.' && ++ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); + } + + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi, int plus) ++ off_t offset, struct fuse_file_info *fi, int plus) + { +- struct lo_dirp *d = lo_dirp(fi); +- char *buf; +- char *p; +- size_t rem = size; +- int err; +- +- (void) ino; +- +- buf = calloc(1, size); +- if (!buf) { +- err = ENOMEM; +- goto error; +- } +- p = buf; +- +- if (offset != d->offset) { +- seekdir(d->dp, offset); +- d->entry = NULL; +- d->offset = offset; +- } +- while (1) { +- size_t entsize; +- off_t nextoff; +- const char *name; +- +- if (!d->entry) { +- errno = 0; +- d->entry = readdir(d->dp); +- if (!d->entry) { +- if (errno) { // Error +- err = errno; +- goto error; +- } else { // End of stream +- break; +- } +- } +- } +- nextoff = d->entry->d_off; +- name = d->entry->d_name; +- fuse_ino_t entry_ino = 0; +- if (plus) { +- struct fuse_entry_param e; +- if (is_dot_or_dotdot(name)) { +- e = (struct fuse_entry_param) { +- .attr.st_ino = d->entry->d_ino, +- .attr.st_mode = d->entry->d_type << 12, +- }; +- } else { +- err = lo_do_lookup(req, ino, name, &e); +- if (err) +- goto error; +- entry_ino = e.ino; +- } +- +- entsize = fuse_add_direntry_plus(req, p, rem, name, +- &e, nextoff); +- } else { +- struct stat st = { +- .st_ino = d->entry->d_ino, +- .st_mode = d->entry->d_type << 12, +- }; +- entsize = fuse_add_direntry(req, p, rem, name, +- &st, nextoff); +- } +- if (entsize > rem) { +- if (entry_ino != 0) +- lo_forget_one(req, entry_ino, 1); +- break; +- } +- +- p += entsize; +- rem -= entsize; +- +- d->entry = NULL; +- d->offset = nextoff; +- } ++ struct lo_dirp *d = lo_dirp(fi); ++ char *buf; ++ char *p; ++ size_t rem = size; ++ int err; ++ ++ (void)ino; ++ ++ buf = calloc(1, size); ++ if (!buf) { ++ err = ENOMEM; ++ goto error; ++ } ++ p = buf; ++ ++ if (offset != d->offset) { ++ seekdir(d->dp, offset); ++ d->entry = NULL; ++ d->offset = offset; ++ } ++ while (1) { ++ size_t entsize; ++ off_t nextoff; ++ const char *name; ++ ++ if (!d->entry) { ++ errno = 0; ++ d->entry = readdir(d->dp); ++ if (!d->entry) { ++ if (errno) { /* Error */ ++ err = errno; ++ goto error; ++ } else { /* End of stream */ ++ break; ++ } ++ } ++ } ++ nextoff = d->entry->d_off; ++ name = d->entry->d_name; ++ fuse_ino_t entry_ino = 0; ++ if (plus) { ++ struct fuse_entry_param e; ++ if (is_dot_or_dotdot(name)) { ++ e = (struct fuse_entry_param){ ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ } else { ++ err = lo_do_lookup(req, ino, name, &e); ++ if (err) { ++ goto error; ++ } ++ entry_ino = e.ino; ++ } ++ ++ entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); ++ } else { ++ struct stat st = { ++ .st_ino = d->entry->d_ino, ++ .st_mode = d->entry->d_type << 12, ++ }; ++ entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); ++ } ++ if (entsize > rem) { ++ if (entry_ino != 0) { ++ lo_forget_one(req, entry_ino, 1); ++ } ++ break; ++ } ++ ++ p += entsize; ++ rem -= entsize; ++ ++ d->entry = NULL; ++ d->offset = nextoff; ++ } + + err = 0; + error: +- // If there's an error, we can only signal it if we haven't stored +- // any entries yet - otherwise we'd end up with wrong lookup +- // counts for the entries that are already in the buffer. So we +- // return what we've collected until that point. +- if (err && rem == size) +- fuse_reply_err(req, err); +- else +- fuse_reply_buf(req, buf, size - rem); ++ /* ++ * If there's an error, we can only signal it if we haven't stored ++ * any entries yet - otherwise we'd end up with wrong lookup ++ * counts for the entries that are already in the buffer. So we ++ * return what we've collected until that point. ++ */ ++ if (err && rem == size) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_buf(req, buf, size - rem); ++ } + free(buf); + } + + static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++ off_t offset, struct fuse_file_info *fi) + { +- lo_do_readdir(req, ino, size, offset, fi, 0); ++ lo_do_readdir(req, ino, size, offset, fi, 0); + } + + static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++ off_t offset, struct fuse_file_info *fi) + { +- lo_do_readdir(req, ino, size, offset, fi, 1); ++ lo_do_readdir(req, ino, size, offset, fi, 1); + } + +-static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- struct lo_dirp *d = lo_dirp(fi); +- (void) ino; +- closedir(d->dp); +- free(d); +- fuse_reply_err(req, 0); ++ struct lo_dirp *d = lo_dirp(fi); ++ (void)ino; ++ closedir(d->dp); ++ free(d); ++ fuse_reply_err(req, 0); + } + + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, struct fuse_file_info *fi) ++ mode_t mode, struct fuse_file_info *fi) + { +- int fd; +- struct lo_data *lo = lo_data(req); +- struct fuse_entry_param e; +- int err; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- +- fd = openat(lo_fd(req, parent), name, +- (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); +- if (fd == -1) +- return (void) fuse_reply_err(req, errno); +- +- fi->fh = fd; +- if (lo->cache == CACHE_NEVER) +- fi->direct_io = 1; +- else if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- +- err = lo_do_lookup(req, parent, name, &e); +- if (err) +- fuse_reply_err(req, err); +- else +- fuse_reply_create(req, &e, fi); ++ int fd; ++ struct lo_data *lo = lo_data(req); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ } ++ ++ fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, ++ mode); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_create(req, &e, fi); ++ } + } + + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- int fd = dirfd(lo_dirp(fi)->dp); +- (void) ino; +- if (datasync) +- res = fdatasync(fd); +- else +- res = fsync(fd); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ int fd = dirfd(lo_dirp(fi)->dp); ++ (void)ino; ++ if (datasync) { ++ res = fdatasync(fd); ++ } else { ++ res = fsync(fd); ++ } ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int fd; +- char buf[64]; +- struct lo_data *lo = lo_data(req); +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", +- ino, fi->flags); +- +- /* With writeback cache, kernel may send read requests even +- when userspace opened write-only */ +- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { +- fi->flags &= ~O_ACCMODE; +- fi->flags |= O_RDWR; +- } +- +- /* With writeback cache, O_APPEND is handled by the kernel. +- This breaks atomicity (since the file may change in the +- underlying filesystem, so that the kernel's idea of the +- end of the file isn't accurate anymore). In this example, +- we just accept that. A more rigorous filesystem may want +- to return an error here */ +- if (lo->writeback && (fi->flags & O_APPEND)) +- fi->flags &= ~O_APPEND; +- +- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); +- fd = open(buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) +- return (void) fuse_reply_err(req, errno); +- +- fi->fh = fd; +- if (lo->cache == CACHE_NEVER) +- fi->direct_io = 1; +- else if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- fuse_reply_open(req, fi); ++ int fd; ++ char buf[64]; ++ struct lo_data *lo = lo_data(req); ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, ++ fi->flags); ++ } ++ ++ /* ++ * With writeback cache, kernel may send read requests even ++ * when userspace opened write-only ++ */ ++ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* ++ * With writeback cache, O_APPEND is handled by the kernel. ++ * This breaks atomicity (since the file may change in the ++ * underlying filesystem, so that the kernel's idea of the ++ * end of the file isn't accurate anymore). In this example, ++ * we just accept that. A more rigorous filesystem may want ++ * to return an error here ++ */ ++ if (lo->writeback && (fi->flags & O_APPEND)) { ++ fi->flags &= ~O_APPEND; ++ } ++ ++ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ fuse_reply_open(req, fi); + } + +-static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_release(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- (void) ino; ++ (void)ino; + +- close(fi->fh); +- fuse_reply_err(req, 0); ++ close(fi->fh); ++ fuse_reply_err(req, 0); + } + + static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int res; +- (void) ino; +- res = close(dup(fi->fh)); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ (void)ino; ++ res = close(dup(fi->fh)); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- (void) ino; +- if (datasync) +- res = fdatasync(fi->fh); +- else +- res = fsync(fi->fh); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ (void)ino; ++ if (datasync) { ++ res = fdatasync(fi->fh); ++ } else { ++ res = fsync(fi->fh); ++ } ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + +-static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, ++ struct fuse_file_info *fi) + { +- struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); ++ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " +- "off=%lu)\n", ino, size, (unsigned long) offset); ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ++ ino, size, (unsigned long)offset); ++ } + +- buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- buf.buf[0].fd = fi->fh; +- buf.buf[0].pos = offset; ++ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ buf.buf[0].fd = fi->fh; ++ buf.buf[0].pos = offset; + +- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); + } + + static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, +- struct fuse_bufvec *in_buf, off_t off, +- struct fuse_file_info *fi) ++ struct fuse_bufvec *in_buf, off_t off, ++ struct fuse_file_info *fi) + { +- (void) ino; +- ssize_t res; +- struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); +- +- out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- out_buf.buf[0].fd = fi->fh; +- out_buf.buf[0].pos = off; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", +- ino, out_buf.buf[0].size, (unsigned long) off); +- +- res = fuse_buf_copy(&out_buf, in_buf, 0); +- if(res < 0) +- fuse_reply_err(req, -res); +- else +- fuse_reply_write(req, (size_t) res); ++ (void)ino; ++ ssize_t res; ++ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ ++ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].pos = off; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, ++ out_buf.buf[0].size, (unsigned long)off); ++ } ++ ++ res = fuse_buf_copy(&out_buf, in_buf, 0); ++ if (res < 0) { ++ fuse_reply_err(req, -res); ++ } else { ++ fuse_reply_write(req, (size_t)res); ++ } + } + + static void lo_statfs(fuse_req_t req, fuse_ino_t ino) + { +- int res; +- struct statvfs stbuf; +- +- res = fstatvfs(lo_fd(req, ino), &stbuf); +- if (res == -1) +- fuse_reply_err(req, errno); +- else +- fuse_reply_statfs(req, &stbuf); ++ int res; ++ struct statvfs stbuf; ++ ++ res = fstatvfs(lo_fd(req, ino), &stbuf); ++ if (res == -1) { ++ fuse_reply_err(req, errno); ++ } else { ++ fuse_reply_statfs(req, &stbuf); ++ } + } + +-static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi) ++static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, ++ off_t length, struct fuse_file_info *fi) + { +- int err = EOPNOTSUPP; +- (void) ino; ++ int err = EOPNOTSUPP; ++ (void)ino; + + #ifdef HAVE_FALLOCATE +- err = fallocate(fi->fh, mode, offset, length); +- if (err < 0) +- err = errno; ++ err = fallocate(fi->fh, mode, offset, length); ++ if (err < 0) { ++ err = errno; ++ } + + #elif defined(HAVE_POSIX_FALLOCATE) +- if (mode) { +- fuse_reply_err(req, EOPNOTSUPP); +- return; +- } ++ if (mode) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } + +- err = posix_fallocate(fi->fh, offset, length); ++ err = posix_fallocate(fi->fh, offset, length); + #endif + +- fuse_reply_err(req, err); ++ fuse_reply_err(req, err); + } + + static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, +- int op) ++ int op) + { +- int res; +- (void) ino; ++ int res; ++ (void)ino; + +- res = flock(fi->fh, op); ++ res = flock(fi->fh, op); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +- size_t size) ++ size_t size) + { +- char *value = NULL; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; +- +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; +- +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", +- ino, name, size); +- } +- +- if (inode->is_symlink) { +- /* Sorry, no race free way to getxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- if (size) { +- value = malloc(size); +- if (!value) +- goto out_err; +- +- ret = getxattr(procname, name, value, size); +- if (ret == -1) +- goto out_err; +- saverr = 0; +- if (ret == 0) +- goto out; +- +- fuse_reply_buf(req, value, ret); +- } else { +- ret = getxattr(procname, name, NULL, 0); +- if (ret == -1) +- goto out_err; +- +- fuse_reply_xattr(req, ret); +- } ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, ++ size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to getxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ ++ ret = getxattr(procname, name, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } ++ saverr = 0; ++ if (ret == 0) { ++ goto out; ++ } ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = getxattr(procname, name, NULL, 0); ++ if (ret == -1) { ++ goto out_err; ++ } ++ ++ fuse_reply_xattr(req, ret); ++ } + out_free: +- free(value); +- return; ++ free(value); ++ return; + + out_err: +- saverr = errno; ++ saverr = errno; + out: +- fuse_reply_err(req, saverr); +- goto out_free; ++ fuse_reply_err(req, saverr); ++ goto out_free; + } + + static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { +- char *value = NULL; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; +- +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; +- +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", +- ino, size); +- } +- +- if (inode->is_symlink) { +- /* Sorry, no race free way to listxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- if (size) { +- value = malloc(size); +- if (!value) +- goto out_err; +- +- ret = listxattr(procname, value, size); +- if (ret == -1) +- goto out_err; +- saverr = 0; +- if (ret == 0) +- goto out; +- +- fuse_reply_buf(req, value, ret); +- } else { +- ret = listxattr(procname, NULL, 0); +- if (ret == -1) +- goto out_err; +- +- fuse_reply_xattr(req, ret); +- } ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ++ ino, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to listxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ ++ ret = listxattr(procname, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } ++ saverr = 0; ++ if (ret == 0) { ++ goto out; ++ } ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = listxattr(procname, NULL, 0); ++ if (ret == -1) { ++ goto out_err; ++ } ++ ++ fuse_reply_xattr(req, ret); ++ } + out_free: +- free(value); +- return; ++ free(value); ++ return; + + out_err: +- saverr = errno; ++ saverr = errno; + out: +- fuse_reply_err(req, saverr); +- goto out_free; ++ fuse_reply_err(req, saverr); ++ goto out_free; + } + + static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +- const char *value, size_t size, int flags) ++ const char *value, size_t size, int flags) + { +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; + +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", +- ino, name, value, size); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", ++ ino, name, value, size); ++ } + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- ret = setxattr(procname, name, value, size, flags); +- saverr = ret == -1 ? errno : 0; ++ ret = setxattr(procname, name, value, size, flags); ++ saverr = ret == -1 ? errno : 0; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; + +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", +- ino, name); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ++ ino, name); ++ } + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- ret = removexattr(procname, name); +- saverr = ret == -1 ? errno : 0; ++ ret = removexattr(procname, name); ++ saverr = ret == -1 ? errno : 0; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + + #ifdef HAVE_COPY_FILE_RANGE + static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, +- struct fuse_file_info *fi_in, +- fuse_ino_t ino_out, off_t off_out, +- struct fuse_file_info *fi_out, size_t len, +- int flags) ++ struct fuse_file_info *fi_in, fuse_ino_t ino_out, ++ off_t off_out, struct fuse_file_info *fi_out, ++ size_t len, int flags) + { +- ssize_t res; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, size=%zd, flags=0x%x)\n", +- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, +- len, flags); +- +- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, +- flags); +- if (res < 0) +- fuse_reply_err(req, -errno); +- else +- fuse_reply_write(req, res); ++ ssize_t res; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, ++ flags); ++ ++ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); ++ if (res < 0) { ++ fuse_reply_err(req, -errno); ++ } else { ++ fuse_reply_write(req, res); ++ } + } + #endif + + static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- off_t res; +- +- (void)ino; +- res = lseek(fi->fh, off, whence); +- if (res != -1) +- fuse_reply_lseek(req, res); +- else +- fuse_reply_err(req, errno); ++ off_t res; ++ ++ (void)ino; ++ res = lseek(fi->fh, off, whence); ++ if (res != -1) { ++ fuse_reply_lseek(req, res); ++ } else { ++ fuse_reply_err(req, errno); ++ } + } + + static struct fuse_lowlevel_ops lo_oper = { +- .init = lo_init, +- .lookup = lo_lookup, +- .mkdir = lo_mkdir, +- .mknod = lo_mknod, +- .symlink = lo_symlink, +- .link = lo_link, +- .unlink = lo_unlink, +- .rmdir = lo_rmdir, +- .rename = lo_rename, +- .forget = lo_forget, +- .forget_multi = lo_forget_multi, +- .getattr = lo_getattr, +- .setattr = lo_setattr, +- .readlink = lo_readlink, +- .opendir = lo_opendir, +- .readdir = lo_readdir, +- .readdirplus = lo_readdirplus, +- .releasedir = lo_releasedir, +- .fsyncdir = lo_fsyncdir, +- .create = lo_create, +- .open = lo_open, +- .release = lo_release, +- .flush = lo_flush, +- .fsync = lo_fsync, +- .read = lo_read, +- .write_buf = lo_write_buf, +- .statfs = lo_statfs, +- .fallocate = lo_fallocate, +- .flock = lo_flock, +- .getxattr = lo_getxattr, +- .listxattr = lo_listxattr, +- .setxattr = lo_setxattr, +- .removexattr = lo_removexattr, ++ .init = lo_init, ++ .lookup = lo_lookup, ++ .mkdir = lo_mkdir, ++ .mknod = lo_mknod, ++ .symlink = lo_symlink, ++ .link = lo_link, ++ .unlink = lo_unlink, ++ .rmdir = lo_rmdir, ++ .rename = lo_rename, ++ .forget = lo_forget, ++ .forget_multi = lo_forget_multi, ++ .getattr = lo_getattr, ++ .setattr = lo_setattr, ++ .readlink = lo_readlink, ++ .opendir = lo_opendir, ++ .readdir = lo_readdir, ++ .readdirplus = lo_readdirplus, ++ .releasedir = lo_releasedir, ++ .fsyncdir = lo_fsyncdir, ++ .create = lo_create, ++ .open = lo_open, ++ .release = lo_release, ++ .flush = lo_flush, ++ .fsync = lo_fsync, ++ .read = lo_read, ++ .write_buf = lo_write_buf, ++ .statfs = lo_statfs, ++ .fallocate = lo_fallocate, ++ .flock = lo_flock, ++ .getxattr = lo_getxattr, ++ .listxattr = lo_listxattr, ++ .setxattr = lo_setxattr, ++ .removexattr = lo_removexattr, + #ifdef HAVE_COPY_FILE_RANGE +- .copy_file_range = lo_copy_file_range, ++ .copy_file_range = lo_copy_file_range, + #endif +- .lseek = lo_lseek, ++ .lseek = lo_lseek, + }; + + int main(int argc, char *argv[]) + { +- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +- struct fuse_session *se; +- struct fuse_cmdline_opts opts; +- struct lo_data lo = { .debug = 0, +- .writeback = 0 }; +- int ret = -1; +- +- /* Don't mask creation mode, kernel already did that */ +- umask(0); +- +- pthread_mutex_init(&lo.mutex, NULL); +- lo.root.next = lo.root.prev = &lo.root; +- lo.root.fd = -1; +- lo.cache = CACHE_NORMAL; +- +- if (fuse_parse_cmdline(&args, &opts) != 0) +- return 1; +- if (opts.show_help) { +- printf("usage: %s [options] \n\n", argv[0]); +- fuse_cmdline_help(); +- fuse_lowlevel_help(); +- ret = 0; +- goto err_out1; +- } else if (opts.show_version) { +- fuse_lowlevel_version(); +- ret = 0; +- goto err_out1; +- } +- +- if(opts.mountpoint == NULL) { +- printf("usage: %s [options] \n", argv[0]); +- printf(" %s --help\n", argv[0]); +- ret = 1; +- goto err_out1; +- } +- +- if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) +- return 1; +- +- lo.debug = opts.debug; +- lo.root.refcount = 2; +- if (lo.source) { +- struct stat stat; +- int res; +- +- res = lstat(lo.source, &stat); +- if (res == -1) { +- fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", +- lo.source); +- exit(1); +- } +- if (!S_ISDIR(stat.st_mode)) { +- fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); +- exit(1); +- } +- +- } else { +- lo.source = "/"; +- } +- lo.root.is_symlink = false; +- if (!lo.timeout_set) { +- switch (lo.cache) { +- case CACHE_NEVER: +- lo.timeout = 0.0; +- break; +- +- case CACHE_NORMAL: +- lo.timeout = 1.0; +- break; +- +- case CACHE_ALWAYS: +- lo.timeout = 86400.0; +- break; +- } +- } else if (lo.timeout < 0) { +- fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", +- lo.timeout); +- exit(1); +- } +- +- lo.root.fd = open(lo.source, O_PATH); +- if (lo.root.fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", +- lo.source); +- exit(1); +- } +- +- se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); +- if (se == NULL) +- goto err_out1; +- +- if (fuse_set_signal_handlers(se) != 0) +- goto err_out2; +- +- if (fuse_session_mount(se, opts.mountpoint) != 0) +- goto err_out3; +- +- fuse_daemonize(opts.foreground); +- +- /* Block until ctrl+c or fusermount -u */ +- if (opts.singlethread) +- ret = fuse_session_loop(se); +- else +- ret = fuse_session_loop_mt(se, opts.clone_fd); +- +- fuse_session_unmount(se); ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse_session *se; ++ struct fuse_cmdline_opts opts; ++ struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ int ret = -1; ++ ++ /* Don't mask creation mode, kernel already did that */ ++ umask(0); ++ ++ pthread_mutex_init(&lo.mutex, NULL); ++ lo.root.next = lo.root.prev = &lo.root; ++ lo.root.fd = -1; ++ lo.cache = CACHE_NORMAL; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) { ++ return 1; ++ } ++ if (opts.show_help) { ++ printf("usage: %s [options] \n\n", argv[0]); ++ fuse_cmdline_help(); ++ fuse_lowlevel_help(); ++ ret = 0; ++ goto err_out1; ++ } else if (opts.show_version) { ++ fuse_lowlevel_version(); ++ ret = 0; ++ goto err_out1; ++ } ++ ++ if (opts.mountpoint == NULL) { ++ printf("usage: %s [options] \n", argv[0]); ++ printf(" %s --help\n", argv[0]); ++ ret = 1; ++ goto err_out1; ++ } ++ ++ if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { ++ return 1; ++ } ++ ++ lo.debug = opts.debug; ++ lo.root.refcount = 2; ++ if (lo.source) { ++ struct stat stat; ++ int res; ++ ++ res = lstat(lo.source, &stat); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", ++ lo.source); ++ exit(1); ++ } ++ if (!S_ISDIR(stat.st_mode)) { ++ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); ++ exit(1); ++ } ++ ++ } else { ++ lo.source = "/"; ++ } ++ lo.root.is_symlink = false; ++ if (!lo.timeout_set) { ++ switch (lo.cache) { ++ case CACHE_NEVER: ++ lo.timeout = 0.0; ++ break; ++ ++ case CACHE_NORMAL: ++ lo.timeout = 1.0; ++ break; ++ ++ case CACHE_ALWAYS: ++ lo.timeout = 86400.0; ++ break; ++ } ++ } else if (lo.timeout < 0) { ++ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); ++ exit(1); ++ } ++ ++ lo.root.fd = open(lo.source, O_PATH); ++ if (lo.root.fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); ++ exit(1); ++ } ++ ++ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); ++ if (se == NULL) { ++ goto err_out1; ++ } ++ ++ if (fuse_set_signal_handlers(se) != 0) { ++ goto err_out2; ++ } ++ ++ if (fuse_session_mount(se, opts.mountpoint) != 0) { ++ goto err_out3; ++ } ++ ++ fuse_daemonize(opts.foreground); ++ ++ /* Block until ctrl+c or fusermount -u */ ++ if (opts.singlethread) { ++ ret = fuse_session_loop(se); ++ } else { ++ ret = fuse_session_loop_mt(se, opts.clone_fd); ++ } ++ ++ fuse_session_unmount(se); + err_out3: +- fuse_remove_signal_handlers(se); ++ fuse_remove_signal_handlers(se); + err_out2: +- fuse_session_destroy(se); ++ fuse_session_destroy(se); + err_out1: +- free(opts.mountpoint); +- fuse_opt_free_args(&args); ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); + +- if (lo.root.fd >= 0) +- close(lo.root.fd); ++ if (lo.root.fd >= 0) { ++ close(lo.root.fd); ++ } + +- return ret ? 1 : 0; ++ return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch b/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch new file mode 100644 index 0000000..8888030 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch @@ -0,0 +1,65 @@ +From 616407b06517361ce444dcc0960aeaf55b52da33 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:41 +0100 +Subject: [PATCH 070/116] virtiofsd: Handle hard reboot +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-67-dgilbert@redhat.com> +Patchwork-id: 93521 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 066/112] virtiofsd: Handle hard reboot +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Handle a + mount + hard reboot (without unmount) + mount + +we get another 'init' which FUSE doesn't normally expect. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e8556f49098b5d95634e592d79a97f761b76c96e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 7d742b5..65f91da 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2433,7 +2433,21 @@ void fuse_session_process_buf_int(struct fuse_session *se, + goto reply_err; + } + } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { +- goto reply_err; ++ if (fuse_lowlevel_is_virtio(se)) { ++ /* ++ * TODO: This is after a hard reboot typically, we need to do ++ * a destroy, but we can't reply to this request yet so ++ * we can't use do_destroy ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__); ++ se->got_destroy = 1; ++ se->got_init = 0; ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } ++ } else { ++ goto reply_err; ++ } + } + + err = EACCES; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Handle-reinit.patch b/SOURCES/kvm-virtiofsd-Handle-reinit.patch new file mode 100644 index 0000000..3f9577b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Handle-reinit.patch @@ -0,0 +1,53 @@ +From 485adfa1aa1b3e2d1449edf5c42d6ec396cbfb5d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:40 +0100 +Subject: [PATCH 069/116] virtiofsd: Handle reinit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-66-dgilbert@redhat.com> +Patchwork-id: 93520 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 065/112] virtiofsd: Handle reinit +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Allow init->destroy->init for mount->umount->mount + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c806d6435fe95fd54b379920aca2f4e3ea1f3258) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index a7a1968..7d742b5 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2028,6 +2028,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + } + + se->got_init = 1; ++ se->got_destroy = 0; + if (se->op.init) { + se->op.init(se->userdata, &se->conn); + } +@@ -2130,6 +2131,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, + (void)iter; + + se->got_destroy = 1; ++ se->got_init = 0; + if (se->op.destroy) { + se->op.destroy(se->userdata); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch b/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch new file mode 100644 index 0000000..18be3e0 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch @@ -0,0 +1,116 @@ +From c818a1cb603cad07aa5c49ce808aa09435667c7c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:04 +0100 +Subject: [PATCH 033/116] virtiofsd: Keep track of replies +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-30-dgilbert@redhat.com> +Patchwork-id: 93481 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 029/112] virtiofsd: Keep track of replies +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Keep track of whether we sent a reply to a request; this is a bit +paranoid but it means: + a) We should always recycle an element even if there was an error + in the request + b) Never try and send two replies on one queue element + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2f65e69a7f22da8d20c747f34f339ebb40a0634f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 05d0e29..f1adeb6 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -44,6 +44,7 @@ struct fv_QueueInfo { + + /* The element for the command currently being processed */ + VuVirtqElement *qe; ++ bool reply_sent; + }; + + /* +@@ -178,6 +179,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + { + VuVirtqElement *elem; + VuVirtq *q; ++ int ret = 0; + + assert(count >= 1); + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); +@@ -191,6 +193,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + assert(out->unique); + /* For virtio we always have ch */ + assert(ch); ++ assert(!ch->qi->reply_sent); + elem = ch->qi->qe; + q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; + +@@ -208,19 +211,23 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + if (in_len < sizeof(struct fuse_out_header)) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", + __func__, elem->index); +- return -E2BIG; ++ ret = -E2BIG; ++ goto err; + } + if (in_len < tosend_len) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", + __func__, elem->index, tosend_len); +- return -E2BIG; ++ ret = -E2BIG; ++ goto err; + } + + copy_iov(iov, count, in_sg, in_num, tosend_len); + vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); + vu_queue_notify(&se->virtio_dev->dev, q); ++ ch->qi->reply_sent = true; + +- return 0; ++err: ++ return ret; + } + + /* Thread function for individual queues, created when a queue is 'started' */ +@@ -296,6 +303,9 @@ static void *fv_queue_thread(void *opaque) + break; + } + ++ qi->qe = elem; ++ qi->reply_sent = false; ++ + if (!fbuf.mem) { + fbuf.mem = malloc(se->bufsize); + assert(fbuf.mem); +@@ -331,6 +341,13 @@ static void *fv_queue_thread(void *opaque) + /* TODO: Add checks for fuse_session_exited */ + fuse_session_process_buf_int(se, &fbuf, &ch); + ++ if (!qi->reply_sent) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", ++ __func__, elem->index); ++ /* I think we've still got to recycle the element */ ++ vu_queue_push(dev, q, elem, 0); ++ vu_queue_notify(dev, q); ++ } + qi->qe = NULL; + free(elem); + elem = NULL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch b/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch new file mode 100644 index 0000000..5e054f3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch @@ -0,0 +1,143 @@ +From b37344c38b866c7e7fb773b4a3172a39306bac7e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:42 +0100 +Subject: [PATCH 071/116] virtiofsd: Kill threads when queues are stopped +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-68-dgilbert@redhat.com> +Patchwork-id: 93522 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 067/112] virtiofsd: Kill threads when queues are stopped +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Kill the threads we've started when the queues get stopped. + +Signed-off-by: Dr. David Alan Gilbert +With improvements by: +Signed-off-by: Eryu Guan +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 10477ac47fc57d00a84802ff97c15450cd8021c1) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 51 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 44 insertions(+), 7 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 872968f..7a8774a 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -41,6 +41,7 @@ struct fv_QueueInfo { + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; ++ int kill_fd; /* For killing the thread */ + + /* The element for the command currently being processed */ + VuVirtqElement *qe; +@@ -412,14 +413,17 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +- struct pollfd pf[1]; ++ struct pollfd pf[2]; + pf[0].fd = qi->kick_fd; + pf[0].events = POLLIN; + pf[0].revents = 0; ++ pf[1].fd = qi->kill_fd; ++ pf[1].events = POLLIN; ++ pf[1].revents = 0; + + fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, + qi->qidx); +- int poll_res = ppoll(pf, 1, NULL, NULL); ++ int poll_res = ppoll(pf, 2, NULL, NULL); + + if (poll_res == -1) { + if (errno == EINTR) { +@@ -430,12 +434,23 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); + break; + } +- assert(poll_res == 1); ++ assert(poll_res >= 1); + if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { + fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", + __func__, pf[0].revents, qi->qidx); + break; + } ++ if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: Unexpected poll revents %x Queue %d killfd\n", ++ __func__, pf[1].revents, qi->qidx); ++ break; ++ } ++ if (pf[1].revents) { ++ fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n", ++ __func__, qi->qidx); ++ break; ++ } + assert(pf[0].revents & POLLIN); + fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, + qi->qidx); +@@ -589,6 +604,28 @@ out: + return NULL; + } + ++static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) ++{ ++ int ret; ++ struct fv_QueueInfo *ourqi; ++ ++ assert(qidx < vud->nqueues); ++ ourqi = vud->qi[qidx]; ++ ++ /* Kill the thread */ ++ if (eventfd_write(ourqi->kill_fd, 1)) { ++ fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n", ++ qidx, strerror(errno)); ++ } ++ ret = pthread_join(ourqi->thread, NULL); ++ if (ret) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", ++ __func__, qidx, ret); ++ } ++ close(ourqi->kill_fd); ++ ourqi->kick_fd = -1; ++} ++ + /* Callback from libvhost-user on start or stop of a queue */ + static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + { +@@ -633,16 +670,16 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + } + ourqi = vud->qi[qidx]; + ourqi->kick_fd = dev->vq[qidx].kick_fd; ++ ++ ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); ++ assert(ourqi->kill_fd != -1); + if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { + fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", + __func__, qidx); + assert(0); + } + } else { +- /* TODO: Kill the thread */ +- assert(qidx < vud->nqueues); +- ourqi = vud->qi[qidx]; +- ourqi->kick_fd = -1; ++ fv_queue_cleanup_thread(vud, qidx); + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch b/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch new file mode 100644 index 0000000..98211cb --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch @@ -0,0 +1,96 @@ +From f09f13f9a001a50ee3465c165f4bbaf870fcadb9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:53 +0100 +Subject: [PATCH 022/116] virtiofsd: Make fsync work even if only inode is + passed in +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-19-dgilbert@redhat.com> +Patchwork-id: 93472 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 018/112] virtiofsd: Make fsync work even if only inode is passed in +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If caller has not sent file handle in request, then using inode, retrieve +the fd opened using O_PATH and use that to open file again and issue +fsync. This will be needed when dax_flush() calls fsync. At that time +we only have inode information (and not file). + +Signed-off-by: Vivek Goyal +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1b209805f8159c3f4d89ddb9390a5f64887cebff) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 6 +++++- + tools/virtiofsd/passthrough_ll.c | 28 ++++++++++++++++++++++++++-- + 2 files changed, 31 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 514d79c..8552cfb 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1075,7 +1075,11 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + fi.fh = arg->fh; + + if (req->se->op.fsync) { +- req->se->op.fsync(req, nodeid, datasync, &fi); ++ if (fi.fh == (uint64_t)-1) { ++ req->se->op.fsync(req, nodeid, datasync, NULL); ++ } else { ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ } + } else { + fuse_reply_err(req, ENOSYS); + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6c4da18..26ac870 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -903,10 +903,34 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + { + int res; + (void)ino; ++ int fd; ++ char *buf; ++ ++ fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, ++ (void *)fi); ++ ++ if (!fi) { ++ res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fd = open(buf, O_RDWR); ++ free(buf); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ } else { ++ fd = fi->fh; ++ } ++ + if (datasync) { +- res = fdatasync(fi->fh); ++ res = fdatasync(fd); + } else { +- res = fsync(fi->fh); ++ res = fsync(fd); ++ } ++ if (!fi) { ++ close(fd); + } + fuse_reply_err(req, res == -1 ? errno : 0); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch b/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch new file mode 100644 index 0000000..2c9874d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch @@ -0,0 +1,257 @@ +From a96042f05eaf494fbe26a9cbd940f5f815f782f9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:56 +0100 +Subject: [PATCH 025/116] virtiofsd: Open vhost connection instead of mounting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-22-dgilbert@redhat.com> +Patchwork-id: 93476 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 021/112] virtiofsd: Open vhost connection instead of mounting +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +When run with vhost-user options we conect to the QEMU instead +via a socket. Start this off by creating the socket. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d14bf584dd965821e80d14c16d9292a464b1ab85) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 7 ++-- + tools/virtiofsd/fuse_lowlevel.c | 55 ++++------------------------ + tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.h | 23 ++++++++++++ + 4 files changed, 114 insertions(+), 50 deletions(-) + create mode 100644 tools/virtiofsd/fuse_virtio.c + create mode 100644 tools/virtiofsd/fuse_virtio.h + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 26b1a7d..82d6ac7 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -6,9 +6,10 @@ + * See the file COPYING.LIB + */ + +-#define FUSE_USE_VERSION 31 +- ++#ifndef FUSE_I_H ++#define FUSE_I_H + ++#define FUSE_USE_VERSION 31 + #include "fuse.h" + #include "fuse_lowlevel.h" + +@@ -101,3 +102,5 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + /* room needed in buffer to accommodate header */ + #define FUSE_BUFFER_HEADER_SIZE 0x1000 ++ ++#endif +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 17e8718..5df124e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -14,6 +14,7 @@ + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" ++#include "fuse_virtio.h" + + #include + #include +@@ -2202,6 +2203,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + ++ if (!se->vu_socket_path) { ++ fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); ++ goto out4; ++ } ++ + se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; + + list_init_req(&se->list); +@@ -2224,54 +2230,7 @@ out1: + + int fuse_session_mount(struct fuse_session *se) + { +- int fd; +- +- /* +- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos +- * would ensue. +- */ +- do { +- fd = open("/dev/null", O_RDWR); +- if (fd > 2) { +- close(fd); +- } +- } while (fd >= 0 && fd <= 2); +- +- /* +- * To allow FUSE daemons to run without privileges, the caller may open +- * /dev/fuse before launching the file system and pass on the file +- * descriptor by specifying /dev/fd/N as the mount point. Note that the +- * parent process takes care of performing the mount in this case. +- */ +- fd = fuse_mnt_parse_fuse_fd(mountpoint); +- if (fd != -1) { +- if (fcntl(fd, F_GETFD) == -1) { +- fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", +- fd); +- return -1; +- } +- se->fd = fd; +- return 0; +- } +- +- /* Open channel */ +- fd = fuse_kern_mount(mountpoint, se->mo); +- if (fd == -1) { +- return -1; +- } +- se->fd = fd; +- +- /* Save mountpoint */ +- se->mountpoint = strdup(mountpoint); +- if (se->mountpoint == NULL) { +- goto error_out; +- } +- +- return 0; +- +-error_out: +- fuse_kern_unmount(mountpoint, fd); +- return -1; ++ return virtio_session_mount(se); + } + + int fuse_session_fd(struct fuse_session *se) +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +new file mode 100644 +index 0000000..cbef6ff +--- /dev/null ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -0,0 +1,79 @@ ++/* ++ * virtio-fs glue for FUSE ++ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates ++ * ++ * Authors: ++ * Dave Gilbert ++ * ++ * Implements the glue between libfuse and libvhost-user ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ ++ ++#include "fuse_i.h" ++#include "standard-headers/linux/fuse.h" ++#include "fuse_misc.h" ++#include "fuse_opt.h" ++#include "fuse_virtio.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* From spec */ ++struct virtio_fs_config { ++ char tag[36]; ++ uint32_t num_queues; ++}; ++ ++int virtio_session_mount(struct fuse_session *se) ++{ ++ struct sockaddr_un un; ++ mode_t old_umask; ++ ++ if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { ++ fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); ++ return -1; ++ } ++ ++ se->fd = -1; ++ ++ /* ++ * Create the Unix socket to communicate with qemu ++ * based on QEMU's vhost-user-bridge ++ */ ++ unlink(se->vu_socket_path); ++ strcpy(un.sun_path, se->vu_socket_path); ++ size_t addr_len = sizeof(un); ++ ++ int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (listen_sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n"); ++ return -1; ++ } ++ un.sun_family = AF_UNIX; ++ ++ /* ++ * Unfortunately bind doesn't let you set the mask on the socket, ++ * so set umask to 077 and restore it later. ++ */ ++ old_umask = umask(0077); ++ if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); ++ umask(old_umask); ++ return -1; ++ } ++ umask(old_umask); ++ ++ if (listen(listen_sock, 1) == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); ++ return -1; ++ } ++ ++ return -1; ++} +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +new file mode 100644 +index 0000000..8f2edb6 +--- /dev/null ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -0,0 +1,23 @@ ++/* ++ * virtio-fs glue for FUSE ++ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates ++ * ++ * Authors: ++ * Dave Gilbert ++ * ++ * Implements the glue between libfuse and libvhost-user ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ ++ ++#ifndef FUSE_VIRTIO_H ++#define FUSE_VIRTIO_H ++ ++#include "fuse_i.h" ++ ++struct fuse_session; ++ ++int virtio_session_mount(struct fuse_session *se); ++ ++#endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch b/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch new file mode 100644 index 0000000..8d8de78 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch @@ -0,0 +1,76 @@ +From ade3dcad8a907d281549b341a8908851e36ba458 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:31 +0100 +Subject: [PATCH 060/116] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-57-dgilbert@redhat.com> +Patchwork-id: 93505 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 056/112] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +Caller can set FUSE_WRITE_KILL_PRIV in write_flags. Parse it and pass it +to the filesystem. + +Signed-off-by: Vivek Goyal +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f779bc5265e7e7abb13a03d4bfbc74151afc15c2) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_common.h | 6 +++++- + tools/virtiofsd/fuse_lowlevel.c | 4 +++- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index f8f6433..686c42c 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -93,8 +93,12 @@ struct fuse_file_info { + */ + unsigned int cache_readdir:1; + ++ /* Indicates that suid/sgid bits should be removed upon write */ ++ unsigned int kill_priv:1; ++ ++ + /** Padding. Reserved for future use*/ +- unsigned int padding:25; ++ unsigned int padding:24; + unsigned int padding2:32; + + /* +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 02e1d83..2d6dc5a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1142,6 +1142,7 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); + + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; +@@ -1177,7 +1178,8 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; + fi.fh = arg->fh; +- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE); ++ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); + + if (ibufv->count == 1) { + assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch b/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch new file mode 100644 index 0000000..7d095c9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch @@ -0,0 +1,140 @@ +From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:20 +0100 +Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-46-dgilbert@redhat.com> +Patchwork-id: 93497 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pass the write iov pointing to guest RAM all the way through rather +than copying the data. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 73 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index fd588a4..872968f 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque) + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + + while (1) { ++ bool allocated_bufv = false; ++ struct fuse_bufvec bufv; ++ struct fuse_bufvec *pbufv; ++ + /* + * An element contains one request and the space to send our + * response They're spread over multiple descriptors in a +@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque) + __func__, elem->index); + assert(0); /* TODO */ + } +- copy_from_iov(&fbuf, out_num, out_sg); +- fbuf.size = out_len; ++ /* Copy just the first element and look at it */ ++ copy_from_iov(&fbuf, 1, out_sg); ++ ++ if (out_num > 2 && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ /* ++ * For a write we don't actually need to copy the ++ * data, we can just do it straight out of guest memory ++ * but we must still copy the headers in case the guest ++ * was nasty and changed them while we were using them. ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); ++ ++ /* copy the fuse_write_in header after the fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ allocated_bufv = true; ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ vu_queue_unpop(dev, q, elem, 0); ++ free(elem); ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } ++ ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; ++ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = 0; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } else { ++ /* Normal (non fast write) path */ ++ ++ /* Copy the rest of the buffer */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_len; + +- /* TODO! Endianness of header */ ++ /* TODO! Endianness of header */ + +- /* TODO: Add checks for fuse_session_exited */ +- struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; +- fuse_session_process_buf_int(se, &bufv, &ch); ++ /* TODO: Add checks for fuse_session_exited */ ++ bufv.buf[0] = fbuf; ++ bufv.count = 1; ++ pbufv = &bufv; ++ } ++ pbufv->idx = 0; ++ pbufv->off = 0; ++ fuse_session_process_buf_int(se, pbufv, &ch); ++ ++ if (allocated_bufv) { ++ free(pbufv); ++ } + + if (!qi->reply_sent) { + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque) + elem = NULL; + } + } ++out: + pthread_mutex_destroy(&ch.lock); + free(fbuf.mem); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch b/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch new file mode 100644 index 0000000..834ced1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch @@ -0,0 +1,168 @@ +From 9e4320eec5204da851ac95fb7a7e6520c9ccee7d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:19 +0100 +Subject: [PATCH 048/116] virtiofsd: Plumb fuse_bufvec through to do_write_buf +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-45-dgilbert@redhat.com> +Patchwork-id: 93499 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 044/112] virtiofsd: Plumb fuse_bufvec through to do_write_buf +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Let fuse_session_process_buf_int take a fuse_bufvec * instead of a +fuse_buf; and then through to do_write_buf - where in the best +case it can pass that straight through to op.write_buf without copying +(other than skipping a header). + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 469f9d2fc405b0508e6cf1b4b5bbcadfc82064e5) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 2 +- + tools/virtiofsd/fuse_lowlevel.c | 61 +++++++++++++++++++++++++++-------------- + tools/virtiofsd/fuse_virtio.c | 3 +- + 3 files changed, 44 insertions(+), 22 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 45995f3..a20854f 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -100,7 +100,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + void fuse_free_req(fuse_req_t req); + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, ++ struct fuse_bufvec *bufv, + struct fuse_chan *ch); + + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 95f4db8..7e10995 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1004,11 +1004,12 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- const struct fuse_buf *ibuf) ++ struct fuse_bufvec *ibufv) + { + struct fuse_session *se = req->se; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, ++ struct fuse_bufvec *pbufv = ibufv; ++ struct fuse_bufvec tmpbufv = { ++ .buf[0] = ibufv->buf[0], + .count = 1, + }; + struct fuse_write_in *arg = (struct fuse_write_in *)inarg; +@@ -1018,22 +1019,31 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; ++ if (ibufv->count == 1) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ tmpbufv.buf[0].mem = PARAM(arg); ++ } ++ tmpbufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ if (tmpbufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ tmpbufv.buf[0].size = arg->size; ++ pbufv = &tmpbufv; ++ } else { ++ /* ++ * Input bufv contains the headers in the first element ++ * and the data in the rest, we need to skip that first element ++ */ ++ ibufv->buf[0].size = 0; + } +- bufv.buf[0].size = arg->size; + +- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +@@ -2024,13 +2034,24 @@ static const char *opname(enum fuse_opcode opcode) + void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf) + { +- fuse_session_process_buf_int(se, buf, NULL); ++ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; ++ fuse_session_process_buf_int(se, &bufv, NULL); + } + ++/* ++ * Restriction: ++ * bufv is normally a single entry buffer, except for a write ++ * where (if it's in memory) then the bufv may be multiple entries, ++ * where the first entry contains all headers and subsequent entries ++ * contain data ++ * bufv shall not use any offsets etc to make the data anything ++ * other than contiguous starting from 0. ++ */ + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, ++ struct fuse_bufvec *bufv, + struct fuse_chan *ch) + { ++ const struct fuse_buf *buf = bufv->buf; + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; +@@ -2108,7 +2129,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { +- do_write_buf(req, in->nodeid, inarg, buf); ++ do_write_buf(req, in->nodeid, inarg, bufv); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 635f877..fd588a4 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -501,7 +501,8 @@ static void *fv_queue_thread(void *opaque) + /* TODO! Endianness of header */ + + /* TODO: Add checks for fuse_session_exited */ +- fuse_session_process_buf_int(se, &fbuf, &ch); ++ struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; ++ fuse_session_process_buf_int(se, &bufv, &ch); + + if (!qi->reply_sent) { + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch b/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch new file mode 100644 index 0000000..d7c6c0a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch @@ -0,0 +1,97 @@ +From 083b944fac29bc3115a19eb38e176f6b23f04938 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:01 +0100 +Subject: [PATCH 030/116] virtiofsd: Poll kick_fd for queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-27-dgilbert@redhat.com> +Patchwork-id: 93483 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 026/112] virtiofsd: Poll kick_fd for queue +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +In the queue thread poll the kick_fd we're passed. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5dcd1f56141378226d33dc3df68ec57913e0aa04) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 40 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 39 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 2a94bb3..05e7258 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -100,13 +101,50 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++/* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +- /* TODO */ ++ struct pollfd pf[1]; ++ pf[0].fd = qi->kick_fd; ++ pf[0].events = POLLIN; ++ pf[0].revents = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, ++ qi->qidx); ++ int poll_res = ppoll(pf, 1, NULL, NULL); ++ ++ if (poll_res == -1) { ++ if (errno == EINTR) { ++ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", ++ __func__); ++ continue; ++ } ++ fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); ++ break; ++ } ++ assert(poll_res == 1); ++ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", ++ __func__, pf[0].revents, qi->qidx); ++ break; ++ } ++ assert(pf[0].revents & POLLIN); ++ fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, ++ qi->qidx); ++ ++ eventfd_t evalue; ++ if (eventfd_read(qi->kick_fd, &evalue)) { ++ fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); ++ break; ++ } ++ if (qi->virtio_dev->se->debug) { ++ fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, ++ qi->qidx, (size_t)evalue); ++ } + } + + return NULL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch b/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch new file mode 100644 index 0000000..d4e1ea1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch @@ -0,0 +1,144 @@ +From ab336e3aea97d76c1b2ac725d19b4518f47dd8f0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:59 +0100 +Subject: [PATCH 088/116] virtiofsd: Prevent multiply running with same + vhost_user_socket +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-85-dgilbert@redhat.com> +Patchwork-id: 93541 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 084/112] virtiofsd: Prevent multiply running with same vhost_user_socket +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd can run multiply even if the vhost_user_socket is same path. + + ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & + [1] 244965 + virtio_session_mount: Waiting for vhost-user socket connection... + ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & + [2] 244966 + virtio_session_mount: Waiting for vhost-user socket connection... + ]# + +The user will get confused about the situation and maybe the cause of the +unexpected problem. So it's better to prevent the multiple running. + +Create a regular file under localstatedir directory to exclude the +vhost_user_socket. To create and lock the file, use qemu_write_pidfile() +because the API has some sanity checks and file lock. + +Signed-off-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert + Applied fixes from Stefan's review and moved osdep include +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 96814800d2b49d18737c36e021c387697ec40c62) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 1 + + tools/virtiofsd/fuse_virtio.c | 49 ++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 49 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 440508a..aac282f 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -18,6 +18,7 @@ + + #include + #include ++#include + #include + #include + #include +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index e7bd772..b7948de 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -13,11 +13,12 @@ + + #include "qemu/osdep.h" + #include "qemu/iov.h" +-#include "fuse_virtio.h" ++#include "qapi/error.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" ++#include "fuse_virtio.h" + + #include + #include +@@ -743,6 +744,42 @@ int virtio_loop(struct fuse_session *se) + return 0; + } + ++static void strreplace(char *s, char old, char new) ++{ ++ for (; *s; ++s) { ++ if (*s == old) { ++ *s = new; ++ } ++ } ++} ++ ++static bool fv_socket_lock(struct fuse_session *se) ++{ ++ g_autofree gchar *sk_name = NULL; ++ g_autofree gchar *pidfile = NULL; ++ g_autofree gchar *dir = NULL; ++ Error *local_err = NULL; ++ ++ dir = qemu_get_local_state_pathname("run/virtiofsd"); ++ ++ if (g_mkdir_with_parents(dir, S_IRWXU) < 0) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s", ++ __func__, dir, strerror(errno)); ++ return false; ++ } ++ ++ sk_name = g_strdup(se->vu_socket_path); ++ strreplace(sk_name, '/', '.'); ++ pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name); ++ ++ if (!qemu_write_pidfile(pidfile, &local_err)) { ++ error_report_err(local_err); ++ return false; ++ } ++ ++ return true; ++} ++ + static int fv_create_listen_socket(struct fuse_session *se) + { + struct sockaddr_un un; +@@ -758,6 +795,16 @@ static int fv_create_listen_socket(struct fuse_session *se) + return -1; + } + ++ if (!strlen(se->vu_socket_path)) { ++ fuse_log(FUSE_LOG_ERR, "Socket path is empty\n"); ++ return -1; ++ } ++ ++ /* Check the vu_socket_path is already used */ ++ if (!fv_socket_lock(se)) { ++ return -1; ++ } ++ + /* + * Create the Unix socket to communicate with qemu + * based on QEMU's vhost-user-bridge +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch b/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch new file mode 100644 index 0000000..f30f23a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch @@ -0,0 +1,945 @@ +From e7c1ad608117b21f80c762f5505a66b21c56e9d3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:40 +0100 +Subject: [PATCH 009/116] virtiofsd: Pull in kernel's fuse.h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-6-dgilbert@redhat.com> +Patchwork-id: 93460 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 005/112] virtiofsd: Pull in kernel's fuse.h +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Update scripts/update-linux-headers.sh to add fuse.h and +use it to pull in fuse.h from the kernel; from v5.5-rc1 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a62a9e192bc5f0aa0bc076b51db5a069add87c78) +Signed-off-by: Miroslav Rezanina +--- + include/standard-headers/linux/fuse.h | 891 ++++++++++++++++++++++++++++++++++ + scripts/update-linux-headers.sh | 1 + + 2 files changed, 892 insertions(+) + create mode 100644 include/standard-headers/linux/fuse.h + +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +new file mode 100644 +index 0000000..f4df0a4 +--- /dev/null ++++ b/include/standard-headers/linux/fuse.h +@@ -0,0 +1,891 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ ++/* ++ This file defines the kernel interface of FUSE ++ Copyright (C) 2001-2008 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU GPL. ++ See the file COPYING. ++ ++ This -- and only this -- header file may also be distributed under ++ the terms of the BSD Licence as follows: ++ ++ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ++ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE ++ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ SUCH DAMAGE. ++*/ ++ ++/* ++ * This file defines the kernel interface of FUSE ++ * ++ * Protocol changelog: ++ * ++ * 7.1: ++ * - add the following messages: ++ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, ++ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, ++ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, ++ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, ++ * FUSE_RELEASEDIR ++ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels ++ * ++ * 7.2: ++ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags ++ * - add FUSE_FSYNCDIR message ++ * ++ * 7.3: ++ * - add FUSE_ACCESS message ++ * - add FUSE_CREATE message ++ * - add filehandle to fuse_setattr_in ++ * ++ * 7.4: ++ * - add frsize to fuse_kstatfs ++ * - clean up request size limit checking ++ * ++ * 7.5: ++ * - add flags and max_write to fuse_init_out ++ * ++ * 7.6: ++ * - add max_readahead to fuse_init_in and fuse_init_out ++ * ++ * 7.7: ++ * - add FUSE_INTERRUPT message ++ * - add POSIX file lock support ++ * ++ * 7.8: ++ * - add lock_owner and flags fields to fuse_release_in ++ * - add FUSE_BMAP message ++ * - add FUSE_DESTROY message ++ * ++ * 7.9: ++ * - new fuse_getattr_in input argument of GETATTR ++ * - add lk_flags in fuse_lk_in ++ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in ++ * - add blksize field to fuse_attr ++ * - add file flags field to fuse_read_in and fuse_write_in ++ * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in ++ * ++ * 7.10 ++ * - add nonseekable open flag ++ * ++ * 7.11 ++ * - add IOCTL message ++ * - add unsolicited notification support ++ * - add POLL message and NOTIFY_POLL notification ++ * ++ * 7.12 ++ * - add umask flag to input argument of create, mknod and mkdir ++ * - add notification messages for invalidation of inodes and ++ * directory entries ++ * ++ * 7.13 ++ * - make max number of background requests and congestion threshold ++ * tunables ++ * ++ * 7.14 ++ * - add splice support to fuse device ++ * ++ * 7.15 ++ * - add store notify ++ * - add retrieve notify ++ * ++ * 7.16 ++ * - add BATCH_FORGET request ++ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct ++ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' ++ * - add FUSE_IOCTL_32BIT flag ++ * ++ * 7.17 ++ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK ++ * ++ * 7.18 ++ * - add FUSE_IOCTL_DIR flag ++ * - add FUSE_NOTIFY_DELETE ++ * ++ * 7.19 ++ * - add FUSE_FALLOCATE ++ * ++ * 7.20 ++ * - add FUSE_AUTO_INVAL_DATA ++ * ++ * 7.21 ++ * - add FUSE_READDIRPLUS ++ * - send the requested events in POLL request ++ * ++ * 7.22 ++ * - add FUSE_ASYNC_DIO ++ * ++ * 7.23 ++ * - add FUSE_WRITEBACK_CACHE ++ * - add time_gran to fuse_init_out ++ * - add reserved space to fuse_init_out ++ * - add FATTR_CTIME ++ * - add ctime and ctimensec to fuse_setattr_in ++ * - add FUSE_RENAME2 request ++ * - add FUSE_NO_OPEN_SUPPORT flag ++ * ++ * 7.24 ++ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support ++ * ++ * 7.25 ++ * - add FUSE_PARALLEL_DIROPS ++ * ++ * 7.26 ++ * - add FUSE_HANDLE_KILLPRIV ++ * - add FUSE_POSIX_ACL ++ * ++ * 7.27 ++ * - add FUSE_ABORT_ERROR ++ * ++ * 7.28 ++ * - add FUSE_COPY_FILE_RANGE ++ * - add FOPEN_CACHE_DIR ++ * - add FUSE_MAX_PAGES, add max_pages to init_out ++ * - add FUSE_CACHE_SYMLINKS ++ * ++ * 7.29 ++ * - add FUSE_NO_OPENDIR_SUPPORT flag ++ * ++ * 7.30 ++ * - add FUSE_EXPLICIT_INVAL_DATA ++ * - add FUSE_IOCTL_COMPAT_X32 ++ * ++ * 7.31 ++ * - add FUSE_WRITE_KILL_PRIV flag ++ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING ++ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag ++ */ ++ ++#ifndef _LINUX_FUSE_H ++#define _LINUX_FUSE_H ++ ++#include ++ ++/* ++ * Version negotiation: ++ * ++ * Both the kernel and userspace send the version they support in the ++ * INIT request and reply respectively. ++ * ++ * If the major versions match then both shall use the smallest ++ * of the two minor versions for communication. ++ * ++ * If the kernel supports a larger major version, then userspace shall ++ * reply with the major version it supports, ignore the rest of the ++ * INIT message and expect a new INIT message from the kernel with a ++ * matching major version. ++ * ++ * If the library supports a larger major version, then it shall fall ++ * back to the major protocol version sent by the kernel for ++ * communication and reply with that major version (and an arbitrary ++ * supported minor version). ++ */ ++ ++/** Version number of this interface */ ++#define FUSE_KERNEL_VERSION 7 ++ ++/** Minor version number of this interface */ ++#define FUSE_KERNEL_MINOR_VERSION 31 ++ ++/** The node ID of the root inode */ ++#define FUSE_ROOT_ID 1 ++ ++/* Make sure all structures are padded to 64bit boundary, so 32bit ++ userspace works under 64bit kernels */ ++ ++struct fuse_attr { ++ uint64_t ino; ++ uint64_t size; ++ uint64_t blocks; ++ uint64_t atime; ++ uint64_t mtime; ++ uint64_t ctime; ++ uint32_t atimensec; ++ uint32_t mtimensec; ++ uint32_t ctimensec; ++ uint32_t mode; ++ uint32_t nlink; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t rdev; ++ uint32_t blksize; ++ uint32_t padding; ++}; ++ ++struct fuse_kstatfs { ++ uint64_t blocks; ++ uint64_t bfree; ++ uint64_t bavail; ++ uint64_t files; ++ uint64_t ffree; ++ uint32_t bsize; ++ uint32_t namelen; ++ uint32_t frsize; ++ uint32_t padding; ++ uint32_t spare[6]; ++}; ++ ++struct fuse_file_lock { ++ uint64_t start; ++ uint64_t end; ++ uint32_t type; ++ uint32_t pid; /* tgid */ ++}; ++ ++/** ++ * Bitmasks for fuse_setattr_in.valid ++ */ ++#define FATTR_MODE (1 << 0) ++#define FATTR_UID (1 << 1) ++#define FATTR_GID (1 << 2) ++#define FATTR_SIZE (1 << 3) ++#define FATTR_ATIME (1 << 4) ++#define FATTR_MTIME (1 << 5) ++#define FATTR_FH (1 << 6) ++#define FATTR_ATIME_NOW (1 << 7) ++#define FATTR_MTIME_NOW (1 << 8) ++#define FATTR_LOCKOWNER (1 << 9) ++#define FATTR_CTIME (1 << 10) ++ ++/** ++ * Flags returned by the OPEN request ++ * ++ * FOPEN_DIRECT_IO: bypass page cache for this open file ++ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open ++ * FOPEN_NONSEEKABLE: the file is not seekable ++ * FOPEN_CACHE_DIR: allow caching this directory ++ * FOPEN_STREAM: the file is stream-like (no file position at all) ++ */ ++#define FOPEN_DIRECT_IO (1 << 0) ++#define FOPEN_KEEP_CACHE (1 << 1) ++#define FOPEN_NONSEEKABLE (1 << 2) ++#define FOPEN_CACHE_DIR (1 << 3) ++#define FOPEN_STREAM (1 << 4) ++ ++/** ++ * INIT request/reply flags ++ * ++ * FUSE_ASYNC_READ: asynchronous read requests ++ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks ++ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) ++ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem ++ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." ++ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB ++ * FUSE_DONT_MASK: don't apply umask to file mode on create operations ++ * FUSE_SPLICE_WRITE: kernel supports splice write on the device ++ * FUSE_SPLICE_MOVE: kernel supports splice move on the device ++ * FUSE_SPLICE_READ: kernel supports splice read on the device ++ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks ++ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories ++ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages ++ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) ++ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus ++ * FUSE_ASYNC_DIO: asynchronous direct I/O submission ++ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes ++ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens ++ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir ++ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc ++ * FUSE_POSIX_ACL: filesystem supports posix acls ++ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED ++ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages ++ * FUSE_CACHE_SYMLINKS: cache READLINK responses ++ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir ++ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request ++ * FUSE_MAP_ALIGNMENT: map_alignment field is valid ++ */ ++#define FUSE_ASYNC_READ (1 << 0) ++#define FUSE_POSIX_LOCKS (1 << 1) ++#define FUSE_FILE_OPS (1 << 2) ++#define FUSE_ATOMIC_O_TRUNC (1 << 3) ++#define FUSE_EXPORT_SUPPORT (1 << 4) ++#define FUSE_BIG_WRITES (1 << 5) ++#define FUSE_DONT_MASK (1 << 6) ++#define FUSE_SPLICE_WRITE (1 << 7) ++#define FUSE_SPLICE_MOVE (1 << 8) ++#define FUSE_SPLICE_READ (1 << 9) ++#define FUSE_FLOCK_LOCKS (1 << 10) ++#define FUSE_HAS_IOCTL_DIR (1 << 11) ++#define FUSE_AUTO_INVAL_DATA (1 << 12) ++#define FUSE_DO_READDIRPLUS (1 << 13) ++#define FUSE_READDIRPLUS_AUTO (1 << 14) ++#define FUSE_ASYNC_DIO (1 << 15) ++#define FUSE_WRITEBACK_CACHE (1 << 16) ++#define FUSE_NO_OPEN_SUPPORT (1 << 17) ++#define FUSE_PARALLEL_DIROPS (1 << 18) ++#define FUSE_HANDLE_KILLPRIV (1 << 19) ++#define FUSE_POSIX_ACL (1 << 20) ++#define FUSE_ABORT_ERROR (1 << 21) ++#define FUSE_MAX_PAGES (1 << 22) ++#define FUSE_CACHE_SYMLINKS (1 << 23) ++#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) ++#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) ++#define FUSE_MAP_ALIGNMENT (1 << 26) ++ ++/** ++ * CUSE INIT request/reply flags ++ * ++ * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl ++ */ ++#define CUSE_UNRESTRICTED_IOCTL (1 << 0) ++ ++/** ++ * Release flags ++ */ ++#define FUSE_RELEASE_FLUSH (1 << 0) ++#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) ++ ++/** ++ * Getattr flags ++ */ ++#define FUSE_GETATTR_FH (1 << 0) ++ ++/** ++ * Lock flags ++ */ ++#define FUSE_LK_FLOCK (1 << 0) ++ ++/** ++ * WRITE flags ++ * ++ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed ++ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid ++ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits ++ */ ++#define FUSE_WRITE_CACHE (1 << 0) ++#define FUSE_WRITE_LOCKOWNER (1 << 1) ++#define FUSE_WRITE_KILL_PRIV (1 << 2) ++ ++/** ++ * Read flags ++ */ ++#define FUSE_READ_LOCKOWNER (1 << 1) ++ ++/** ++ * Ioctl flags ++ * ++ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine ++ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed ++ * FUSE_IOCTL_RETRY: retry with new iovecs ++ * FUSE_IOCTL_32BIT: 32bit ioctl ++ * FUSE_IOCTL_DIR: is a directory ++ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) ++ * ++ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs ++ */ ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_32BIT (1 << 3) ++#define FUSE_IOCTL_DIR (1 << 4) ++#define FUSE_IOCTL_COMPAT_X32 (1 << 5) ++ ++#define FUSE_IOCTL_MAX_IOV 256 ++ ++/** ++ * Poll flags ++ * ++ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify ++ */ ++#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) ++ ++/** ++ * Fsync flags ++ * ++ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata ++ */ ++#define FUSE_FSYNC_FDATASYNC (1 << 0) ++ ++enum fuse_opcode { ++ FUSE_LOOKUP = 1, ++ FUSE_FORGET = 2, /* no reply */ ++ FUSE_GETATTR = 3, ++ FUSE_SETATTR = 4, ++ FUSE_READLINK = 5, ++ FUSE_SYMLINK = 6, ++ FUSE_MKNOD = 8, ++ FUSE_MKDIR = 9, ++ FUSE_UNLINK = 10, ++ FUSE_RMDIR = 11, ++ FUSE_RENAME = 12, ++ FUSE_LINK = 13, ++ FUSE_OPEN = 14, ++ FUSE_READ = 15, ++ FUSE_WRITE = 16, ++ FUSE_STATFS = 17, ++ FUSE_RELEASE = 18, ++ FUSE_FSYNC = 20, ++ FUSE_SETXATTR = 21, ++ FUSE_GETXATTR = 22, ++ FUSE_LISTXATTR = 23, ++ FUSE_REMOVEXATTR = 24, ++ FUSE_FLUSH = 25, ++ FUSE_INIT = 26, ++ FUSE_OPENDIR = 27, ++ FUSE_READDIR = 28, ++ FUSE_RELEASEDIR = 29, ++ FUSE_FSYNCDIR = 30, ++ FUSE_GETLK = 31, ++ FUSE_SETLK = 32, ++ FUSE_SETLKW = 33, ++ FUSE_ACCESS = 34, ++ FUSE_CREATE = 35, ++ FUSE_INTERRUPT = 36, ++ FUSE_BMAP = 37, ++ FUSE_DESTROY = 38, ++ FUSE_IOCTL = 39, ++ FUSE_POLL = 40, ++ FUSE_NOTIFY_REPLY = 41, ++ FUSE_BATCH_FORGET = 42, ++ FUSE_FALLOCATE = 43, ++ FUSE_READDIRPLUS = 44, ++ FUSE_RENAME2 = 45, ++ FUSE_LSEEK = 46, ++ FUSE_COPY_FILE_RANGE = 47, ++ FUSE_SETUPMAPPING = 48, ++ FUSE_REMOVEMAPPING = 49, ++ ++ /* CUSE specific operations */ ++ CUSE_INIT = 4096, ++ ++ /* Reserved opcodes: helpful to detect structure endian-ness */ ++ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ ++ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ ++}; ++ ++enum fuse_notify_code { ++ FUSE_NOTIFY_POLL = 1, ++ FUSE_NOTIFY_INVAL_INODE = 2, ++ FUSE_NOTIFY_INVAL_ENTRY = 3, ++ FUSE_NOTIFY_STORE = 4, ++ FUSE_NOTIFY_RETRIEVE = 5, ++ FUSE_NOTIFY_DELETE = 6, ++ FUSE_NOTIFY_CODE_MAX, ++}; ++ ++/* The read buffer is required to be at least 8k, but may be much larger */ ++#define FUSE_MIN_READ_BUFFER 8192 ++ ++#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 ++ ++struct fuse_entry_out { ++ uint64_t nodeid; /* Inode ID */ ++ uint64_t generation; /* Inode generation: nodeid:gen must ++ be unique for the fs's lifetime */ ++ uint64_t entry_valid; /* Cache timeout for the name */ ++ uint64_t attr_valid; /* Cache timeout for the attributes */ ++ uint32_t entry_valid_nsec; ++ uint32_t attr_valid_nsec; ++ struct fuse_attr attr; ++}; ++ ++struct fuse_forget_in { ++ uint64_t nlookup; ++}; ++ ++struct fuse_forget_one { ++ uint64_t nodeid; ++ uint64_t nlookup; ++}; ++ ++struct fuse_batch_forget_in { ++ uint32_t count; ++ uint32_t dummy; ++}; ++ ++struct fuse_getattr_in { ++ uint32_t getattr_flags; ++ uint32_t dummy; ++ uint64_t fh; ++}; ++ ++#define FUSE_COMPAT_ATTR_OUT_SIZE 96 ++ ++struct fuse_attr_out { ++ uint64_t attr_valid; /* Cache timeout for the attributes */ ++ uint32_t attr_valid_nsec; ++ uint32_t dummy; ++ struct fuse_attr attr; ++}; ++ ++#define FUSE_COMPAT_MKNOD_IN_SIZE 8 ++ ++struct fuse_mknod_in { ++ uint32_t mode; ++ uint32_t rdev; ++ uint32_t umask; ++ uint32_t padding; ++}; ++ ++struct fuse_mkdir_in { ++ uint32_t mode; ++ uint32_t umask; ++}; ++ ++struct fuse_rename_in { ++ uint64_t newdir; ++}; ++ ++struct fuse_rename2_in { ++ uint64_t newdir; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++struct fuse_link_in { ++ uint64_t oldnodeid; ++}; ++ ++struct fuse_setattr_in { ++ uint32_t valid; ++ uint32_t padding; ++ uint64_t fh; ++ uint64_t size; ++ uint64_t lock_owner; ++ uint64_t atime; ++ uint64_t mtime; ++ uint64_t ctime; ++ uint32_t atimensec; ++ uint32_t mtimensec; ++ uint32_t ctimensec; ++ uint32_t mode; ++ uint32_t unused4; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t unused5; ++}; ++ ++struct fuse_open_in { ++ uint32_t flags; ++ uint32_t unused; ++}; ++ ++struct fuse_create_in { ++ uint32_t flags; ++ uint32_t mode; ++ uint32_t umask; ++ uint32_t padding; ++}; ++ ++struct fuse_open_out { ++ uint64_t fh; ++ uint32_t open_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_release_in { ++ uint64_t fh; ++ uint32_t flags; ++ uint32_t release_flags; ++ uint64_t lock_owner; ++}; ++ ++struct fuse_flush_in { ++ uint64_t fh; ++ uint32_t unused; ++ uint32_t padding; ++ uint64_t lock_owner; ++}; ++ ++struct fuse_read_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t read_flags; ++ uint64_t lock_owner; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++#define FUSE_COMPAT_WRITE_IN_SIZE 24 ++ ++struct fuse_write_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t write_flags; ++ uint64_t lock_owner; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++struct fuse_write_out { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++#define FUSE_COMPAT_STATFS_SIZE 48 ++ ++struct fuse_statfs_out { ++ struct fuse_kstatfs st; ++}; ++ ++struct fuse_fsync_in { ++ uint64_t fh; ++ uint32_t fsync_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_setxattr_in { ++ uint32_t size; ++ uint32_t flags; ++}; ++ ++struct fuse_getxattr_in { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_getxattr_out { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_lk_in { ++ uint64_t fh; ++ uint64_t owner; ++ struct fuse_file_lock lk; ++ uint32_t lk_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_lk_out { ++ struct fuse_file_lock lk; ++}; ++ ++struct fuse_access_in { ++ uint32_t mask; ++ uint32_t padding; ++}; ++ ++struct fuse_init_in { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t max_readahead; ++ uint32_t flags; ++}; ++ ++#define FUSE_COMPAT_INIT_OUT_SIZE 8 ++#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 ++ ++struct fuse_init_out { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t max_readahead; ++ uint32_t flags; ++ uint16_t max_background; ++ uint16_t congestion_threshold; ++ uint32_t max_write; ++ uint32_t time_gran; ++ uint16_t max_pages; ++ uint16_t map_alignment; ++ uint32_t unused[8]; ++}; ++ ++#define CUSE_INIT_INFO_MAX 4096 ++ ++struct cuse_init_in { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t unused; ++ uint32_t flags; ++}; ++ ++struct cuse_init_out { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t unused; ++ uint32_t flags; ++ uint32_t max_read; ++ uint32_t max_write; ++ uint32_t dev_major; /* chardev major */ ++ uint32_t dev_minor; /* chardev minor */ ++ uint32_t spare[10]; ++}; ++ ++struct fuse_interrupt_in { ++ uint64_t unique; ++}; ++ ++struct fuse_bmap_in { ++ uint64_t block; ++ uint32_t blocksize; ++ uint32_t padding; ++}; ++ ++struct fuse_bmap_out { ++ uint64_t block; ++}; ++ ++struct fuse_ioctl_in { ++ uint64_t fh; ++ uint32_t flags; ++ uint32_t cmd; ++ uint64_t arg; ++ uint32_t in_size; ++ uint32_t out_size; ++}; ++ ++struct fuse_ioctl_iovec { ++ uint64_t base; ++ uint64_t len; ++}; ++ ++struct fuse_ioctl_out { ++ int32_t result; ++ uint32_t flags; ++ uint32_t in_iovs; ++ uint32_t out_iovs; ++}; ++ ++struct fuse_poll_in { ++ uint64_t fh; ++ uint64_t kh; ++ uint32_t flags; ++ uint32_t events; ++}; ++ ++struct fuse_poll_out { ++ uint32_t revents; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_poll_wakeup_out { ++ uint64_t kh; ++}; ++ ++struct fuse_fallocate_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint64_t length; ++ uint32_t mode; ++ uint32_t padding; ++}; ++ ++struct fuse_in_header { ++ uint32_t len; ++ uint32_t opcode; ++ uint64_t unique; ++ uint64_t nodeid; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t pid; ++ uint32_t padding; ++}; ++ ++struct fuse_out_header { ++ uint32_t len; ++ int32_t error; ++ uint64_t unique; ++}; ++ ++struct fuse_dirent { ++ uint64_t ino; ++ uint64_t off; ++ uint32_t namelen; ++ uint32_t type; ++ char name[]; ++}; ++ ++#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) ++#define FUSE_DIRENT_ALIGN(x) \ ++ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) ++#define FUSE_DIRENT_SIZE(d) \ ++ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) ++ ++struct fuse_direntplus { ++ struct fuse_entry_out entry_out; ++ struct fuse_dirent dirent; ++}; ++ ++#define FUSE_NAME_OFFSET_DIRENTPLUS \ ++ offsetof(struct fuse_direntplus, dirent.name) ++#define FUSE_DIRENTPLUS_SIZE(d) \ ++ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) ++ ++struct fuse_notify_inval_inode_out { ++ uint64_t ino; ++ int64_t off; ++ int64_t len; ++}; ++ ++struct fuse_notify_inval_entry_out { ++ uint64_t parent; ++ uint32_t namelen; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_delete_out { ++ uint64_t parent; ++ uint64_t child; ++ uint32_t namelen; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_store_out { ++ uint64_t nodeid; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_retrieve_out { ++ uint64_t notify_unique; ++ uint64_t nodeid; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++/* Matches the size of fuse_write_in */ ++struct fuse_notify_retrieve_in { ++ uint64_t dummy1; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t dummy2; ++ uint64_t dummy3; ++ uint64_t dummy4; ++}; ++ ++/* Device ioctls: */ ++#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) ++ ++struct fuse_lseek_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t whence; ++ uint32_t padding; ++}; ++ ++struct fuse_lseek_out { ++ uint64_t offset; ++}; ++ ++struct fuse_copy_file_range_in { ++ uint64_t fh_in; ++ uint64_t off_in; ++ uint64_t nodeid_out; ++ uint64_t fh_out; ++ uint64_t off_out; ++ uint64_t len; ++ uint64_t flags; ++}; ++ ++#endif /* _LINUX_FUSE_H */ +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f76d773..29c27f4 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux" + mkdir -p "$output/include/standard-headers/linux" + for i in "$tmpdir"/include/linux/*virtio*.h \ + "$tmpdir/include/linux/qemu_fw_cfg.h" \ ++ "$tmpdir/include/linux/fuse.h" \ + "$tmpdir/include/linux/input.h" \ + "$tmpdir/include/linux/input-event-codes.h" \ + "$tmpdir/include/linux/pci_regs.h" \ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch b/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch new file mode 100644 index 0000000..78784fb --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch @@ -0,0 +1,4911 @@ +From 434b51e5c2fce756906dec4803900397bc98ad72 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:39 +0100 +Subject: [PATCH 008/116] virtiofsd: Pull in upstream headers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-5-dgilbert@redhat.com> +Patchwork-id: 93457 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 004/112] virtiofsd: Pull in upstream headers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pull in headers fromlibfuse's upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ee46c78901eb7fa78e328e04c0494ad6d207238b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse.h | 1275 ++++++++++++++++++++ + tools/virtiofsd/fuse_common.h | 823 +++++++++++++ + tools/virtiofsd/fuse_i.h | 139 +++ + tools/virtiofsd/fuse_log.h | 82 ++ + tools/virtiofsd/fuse_lowlevel.h | 2089 +++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_misc.h | 59 + + tools/virtiofsd/fuse_opt.h | 271 +++++ + tools/virtiofsd/passthrough_helpers.h | 76 ++ + 8 files changed, 4814 insertions(+) + create mode 100644 tools/virtiofsd/fuse.h + create mode 100644 tools/virtiofsd/fuse_common.h + create mode 100644 tools/virtiofsd/fuse_i.h + create mode 100644 tools/virtiofsd/fuse_log.h + create mode 100644 tools/virtiofsd/fuse_lowlevel.h + create mode 100644 tools/virtiofsd/fuse_misc.h + create mode 100644 tools/virtiofsd/fuse_opt.h + create mode 100644 tools/virtiofsd/passthrough_helpers.h + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +new file mode 100644 +index 0000000..883f6e5 +--- /dev/null ++++ b/tools/virtiofsd/fuse.h +@@ -0,0 +1,1275 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_H_ ++#define FUSE_H_ ++ ++/** @file ++ * ++ * This file defines the library interface of FUSE ++ * ++ * IMPORTANT: you should define FUSE_USE_VERSION before including this header. ++ */ ++ ++#include "fuse_common.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ----------------------------------------------------------- * ++ * Basic FUSE API * ++ * ----------------------------------------------------------- */ ++ ++/** Handle for a FUSE filesystem */ ++struct fuse; ++ ++/** ++ * Readdir flags, passed to ->readdir() ++ */ ++enum fuse_readdir_flags { ++ /** ++ * "Plus" mode. ++ * ++ * The kernel wants to prefill the inode cache during readdir. The ++ * filesystem may honour this by filling in the attributes and setting ++ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also ++ * just ignore this flag completely. ++ */ ++ FUSE_READDIR_PLUS = (1 << 0), ++}; ++ ++enum fuse_fill_dir_flags { ++ /** ++ * "Plus" mode: all file attributes are valid ++ * ++ * The attributes are used by the kernel to prefill the inode cache ++ * during a readdir. ++ * ++ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set ++ * and vice versa. ++ */ ++ FUSE_FILL_DIR_PLUS = (1 << 1), ++}; ++ ++/** Function to add an entry in a readdir() operation ++ * ++ * The *off* parameter can be any non-zero value that enables the ++ * filesystem to identify the current point in the directory ++ * stream. It does not need to be the actual physical position. A ++ * value of zero is reserved to indicate that seeking in directories ++ * is not supported. ++ * ++ * @param buf the buffer passed to the readdir() operation ++ * @param name the file name of the directory entry ++ * @param stat file attributes, can be NULL ++ * @param off offset of the next entry or zero ++ * @param flags fill flags ++ * @return 1 if buffer is full, zero otherwise ++ */ ++typedef int (*fuse_fill_dir_t) (void *buf, const char *name, ++ const struct stat *stbuf, off_t off, ++ enum fuse_fill_dir_flags flags); ++/** ++ * Configuration of the high-level API ++ * ++ * This structure is initialized from the arguments passed to ++ * fuse_new(), and then passed to the file system's init() handler ++ * which should ensure that the configuration is compatible with the ++ * file system implementation. ++ */ ++struct fuse_config { ++ /** ++ * If `set_gid` is non-zero, the st_gid attribute of each file ++ * is overwritten with the value of `gid`. ++ */ ++ int set_gid; ++ unsigned int gid; ++ ++ /** ++ * If `set_uid` is non-zero, the st_uid attribute of each file ++ * is overwritten with the value of `uid`. ++ */ ++ int set_uid; ++ unsigned int uid; ++ ++ /** ++ * If `set_mode` is non-zero, the any permissions bits set in ++ * `umask` are unset in the st_mode attribute of each file. ++ */ ++ int set_mode; ++ unsigned int umask; ++ ++ /** ++ * The timeout in seconds for which name lookups will be ++ * cached. ++ */ ++ double entry_timeout; ++ ++ /** ++ * The timeout in seconds for which a negative lookup will be ++ * cached. This means, that if file did not exist (lookup ++ * retuned ENOENT), the lookup will only be redone after the ++ * timeout, and the file/directory will be assumed to not ++ * exist until then. A value of zero means that negative ++ * lookups are not cached. ++ */ ++ double negative_timeout; ++ ++ /** ++ * The timeout in seconds for which file/directory attributes ++ * (as returned by e.g. the `getattr` handler) are cached. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Allow requests to be interrupted ++ */ ++ int intr; ++ ++ /** ++ * Specify which signal number to send to the filesystem when ++ * a request is interrupted. The default is hardcoded to ++ * USR1. ++ */ ++ int intr_signal; ++ ++ /** ++ * Normally, FUSE assigns inodes to paths only for as long as ++ * the kernel is aware of them. With this option inodes are ++ * instead remembered for at least this many seconds. This ++ * will require more memory, but may be necessary when using ++ * applications that make use of inode numbers. ++ * ++ * A number of -1 means that inodes will be remembered for the ++ * entire life-time of the file-system process. ++ */ ++ int remember; ++ ++ /** ++ * The default behavior is that if an open file is deleted, ++ * the file is renamed to a hidden file (.fuse_hiddenXXX), and ++ * only removed when the file is finally released. This ++ * relieves the filesystem implementation of having to deal ++ * with this problem. This option disables the hiding ++ * behavior, and files are removed immediately in an unlink ++ * operation (or in a rename operation which overwrites an ++ * existing file). ++ * ++ * It is recommended that you not use the hard_remove ++ * option. When hard_remove is set, the following libc ++ * functions fail on unlinked files (returning errno of ++ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), ++ * ftruncate(2), fstat(2), fchmod(2), fchown(2) ++ */ ++ int hard_remove; ++ ++ /** ++ * Honor the st_ino field in the functions getattr() and ++ * fill_dir(). This value is used to fill in the st_ino field ++ * in the stat(2), lstat(2), fstat(2) functions and the d_ino ++ * field in the readdir(2) function. The filesystem does not ++ * have to guarantee uniqueness, however some applications ++ * rely on this value being unique for the whole filesystem. ++ * ++ * Note that this does *not* affect the inode that libfuse ++ * and the kernel use internally (also called the "nodeid"). ++ */ ++ int use_ino; ++ ++ /** ++ * If use_ino option is not given, still try to fill in the ++ * d_ino field in readdir(2). If the name was previously ++ * looked up, and is still in the cache, the inode number ++ * found there will be used. Otherwise it will be set to -1. ++ * If use_ino option is given, this option is ignored. ++ */ ++ int readdir_ino; ++ ++ /** ++ * This option disables the use of page cache (file content cache) ++ * in the kernel for this filesystem. This has several affects: ++ * ++ * 1. Each read(2) or write(2) system call will initiate one ++ * or more read or write operations, data will not be ++ * cached in the kernel. ++ * ++ * 2. The return value of the read() and write() system calls ++ * will correspond to the return values of the read and ++ * write operations. This is useful for example if the ++ * file size is not known in advance (before reading it). ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `direct_io` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int direct_io; ++ ++ /** ++ * This option disables flushing the cache of the file ++ * contents on every open(2). This should only be enabled on ++ * filesystems where the file data is never changed ++ * externally (not through the mounted FUSE filesystem). Thus ++ * it is not suitable for network filesystems and other ++ * intermediate filesystems. ++ * ++ * NOTE: if this option is not specified (and neither ++ * direct_io) data is still cached after the open(2), so a ++ * read(2) system call will not always initiate a read ++ * operation. ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `keep_cache` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int kernel_cache; ++ ++ /** ++ * This option is an alternative to `kernel_cache`. Instead of ++ * unconditionally keeping cached data, the cached data is ++ * invalidated on open(2) if if the modification time or the ++ * size of the file has changed since it was last opened. ++ */ ++ int auto_cache; ++ ++ /** ++ * The timeout in seconds for which file attributes are cached ++ * for the purpose of checking if auto_cache should flush the ++ * file data on open. ++ */ ++ int ac_attr_timeout_set; ++ double ac_attr_timeout; ++ ++ /** ++ * If this option is given the file-system handlers for the ++ * following operations will not receive path information: ++ * read, write, flush, release, fsync, readdir, releasedir, ++ * fsyncdir, lock, ioctl and poll. ++ * ++ * For the truncate, getattr, chmod, chown and utimens ++ * operations the path will be provided only if the struct ++ * fuse_file_info argument is NULL. ++ */ ++ int nullpath_ok; ++ ++ /** ++ * The remaining options are used by libfuse internally and ++ * should not be touched. ++ */ ++ int show_help; ++ char *modules; ++ int debug; ++}; ++ ++ ++/** ++ * The file system operations: ++ * ++ * Most of these should work very similarly to the well known UNIX ++ * file system operations. A major exception is that instead of ++ * returning an error in 'errno', the operation should return the ++ * negated error value (-errno) directly. ++ * ++ * All methods are optional, but some are essential for a useful ++ * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, ++ * releasedir, fsyncdir, access, create, truncate, lock, init and ++ * destroy are special purpose methods, without which a full featured ++ * filesystem can still be implemented. ++ * ++ * In general, all methods are expected to perform any necessary ++ * permission checking. However, a filesystem may delegate this task ++ * to the kernel by passing the `default_permissions` mount option to ++ * `fuse_new()`. In this case, methods will only be called if ++ * the kernel's permission check has succeeded. ++ * ++ * Almost all operations take a path which can be of any length. ++ */ ++struct fuse_operations { ++ /** Get file attributes. ++ * ++ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are ++ * ignored. The 'st_ino' field is ignored except if the 'use_ino' ++ * mount option is given. In that case it is passed to userspace, ++ * but libfuse and the kernel will still assign a different ++ * inode for internal use (called the "nodeid"). ++ * ++ * `fi` will always be NULL if the file is not currently open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); ++ ++ /** Read the target of a symbolic link ++ * ++ * The buffer should be filled with a null terminated string. The ++ * buffer size argument includes the space for the terminating ++ * null character. If the linkname is too long to fit in the ++ * buffer, it should be truncated. The return value should be 0 ++ * for success. ++ */ ++ int (*readlink) (const char *, char *, size_t); ++ ++ /** Create a file node ++ * ++ * This is called for creation of all non-directory, non-symlink ++ * nodes. If the filesystem defines a create() method, then for ++ * regular files that will be called instead. ++ */ ++ int (*mknod) (const char *, mode_t, dev_t); ++ ++ /** Create a directory ++ * ++ * Note that the mode argument may not have the type specification ++ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the ++ * correct directory type bits use mode|S_IFDIR ++ * */ ++ int (*mkdir) (const char *, mode_t); ++ ++ /** Remove a file */ ++ int (*unlink) (const char *); ++ ++ /** Remove a directory */ ++ int (*rmdir) (const char *); ++ ++ /** Create a symbolic link */ ++ int (*symlink) (const char *, const char *); ++ ++ /** Rename a file ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ */ ++ int (*rename) (const char *, const char *, unsigned int flags); ++ ++ /** Create a hard link to a file */ ++ int (*link) (const char *, const char *); ++ ++ /** Change the permission bits of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); ++ ++ /** Change the owner and group of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); ++ ++ /** Change the size of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*truncate) (const char *, off_t, struct fuse_file_info *fi); ++ ++ /** Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) ++ * should be used by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount option is ++ * given, this check is already done by the kernel before calling ++ * open() and may thus be omitted by the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open will also succeed without being send ++ * to the filesystem process. ++ * ++ */ ++ int (*open) (const char *, struct fuse_file_info *); ++ ++ /** Read data from an open file ++ * ++ * Read should return exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the ++ * 'direct_io' mount option is specified, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ */ ++ int (*read) (const char *, char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** Write data to an open file ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the 'direct_io' ++ * mount option is specified (see read operation). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write) (const char *, const char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** Get file system statistics ++ * ++ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored ++ */ ++ int (*statfs) (const char *, struct statvfs *); ++ ++ /** Possibly flush cached data ++ * ++ * BIG NOTE: This is not equivalent to fsync(). It's not a ++ * request to sync dirty data. ++ * ++ * Flush is called on each close() of a file descriptor, as opposed to ++ * release which is called on the close of the last file descriptor for ++ * a file. Under Linux, errors returned by flush() will be passed to ++ * userspace as errors from close(), so flush() is a good place to write ++ * back any cached dirty data. However, many applications ignore errors ++ * on close(), and on non-Linux systems, close() may succeed even if flush() ++ * returns an error. For these reasons, filesystems should not assume ++ * that errors returned by flush will ever be noticed or even ++ * delivered. ++ * ++ * NOTE: The flush() method may be called more than once for each ++ * open(). This happens if more than one file descriptor refers to an ++ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is ++ * not possible to determine if a flush is final, so each flush should ++ * be treated equally. Multiple write-flush sequences are relatively ++ * rare, so this shouldn't be a problem. ++ * ++ * Filesystems shouldn't assume that flush will be called at any ++ * particular point. It may be called more times than expected, or not ++ * at all. ++ * ++ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ int (*flush) (const char *, struct fuse_file_info *); ++ ++ /** Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open() call there will be exactly one release() call ++ * with the same flags and file handle. It is possible to ++ * have a file opened more than once, in which case only the last ++ * release will mean, that no more reads/writes will happen on the ++ * file. The return value of release is ignored. ++ */ ++ int (*release) (const char *, struct fuse_file_info *); ++ ++ /** Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ */ ++ int (*fsync) (const char *, int, struct fuse_file_info *); ++ ++ /** Set extended attributes */ ++ int (*setxattr) (const char *, const char *, const char *, size_t, int); ++ ++ /** Get extended attributes */ ++ int (*getxattr) (const char *, const char *, char *, size_t); ++ ++ /** List extended attributes */ ++ int (*listxattr) (const char *, char *, size_t); ++ ++ /** Remove extended attributes */ ++ int (*removexattr) (const char *, const char *); ++ ++ /** Open directory ++ * ++ * Unless the 'default_permissions' mount option is given, ++ * this method should check if opendir is permitted for this ++ * directory. Optionally opendir may also return an arbitrary ++ * filehandle in the fuse_file_info structure, which will be ++ * passed to readdir, releasedir and fsyncdir. ++ */ ++ int (*opendir) (const char *, struct fuse_file_info *); ++ ++ /** Read directory ++ * ++ * The filesystem may choose between two modes of operation: ++ * ++ * 1) The readdir implementation ignores the offset parameter, and ++ * passes zero to the filler function's offset. The filler ++ * function will not return '1' (unless an error happens), so the ++ * whole directory is read in a single readdir operation. ++ * ++ * 2) The readdir implementation keeps track of the offsets of the ++ * directory entries. It uses the offset parameter and always ++ * passes non-zero offset to the filler function. When the buffer ++ * is full (or an error happens) the filler function will return ++ * '1'. ++ */ ++ int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, ++ struct fuse_file_info *, enum fuse_readdir_flags); ++ ++ /** Release directory ++ */ ++ int (*releasedir) (const char *, struct fuse_file_info *); ++ ++ /** Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data ++ */ ++ int (*fsyncdir) (const char *, int, struct fuse_file_info *); ++ ++ /** ++ * Initialize filesystem ++ * ++ * The return value will passed in the `private_data` field of ++ * `struct fuse_context` to all file operations, and as a ++ * parameter to the destroy() method. It overrides the initial ++ * value provided to fuse_main() / fuse_new(). ++ */ ++ void *(*init) (struct fuse_conn_info *conn, ++ struct fuse_config *cfg); ++ ++ /** ++ * Clean up filesystem ++ * ++ * Called on filesystem exit. ++ */ ++ void (*destroy) (void *private_data); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() system call. If the ++ * 'default_permissions' mount option is given, this method is not ++ * called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ */ ++ int (*access) (const char *, int); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ */ ++ int (*create) (const char *, mode_t, struct fuse_file_info *); ++ ++ /** ++ * Perform POSIX file locking operation ++ * ++ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. ++ * ++ * For the meaning of fields in 'struct flock' see the man page ++ * for fcntl(2). The l_whence field will always be set to ++ * SEEK_SET. ++ * ++ * For checking lock ownership, the 'fuse_file_info->owner' ++ * argument must be used. ++ * ++ * For F_GETLK operation, the library will first check currently ++ * held locks, and if a conflicting lock is found it will return ++ * information without calling this method. This ensures, that ++ * for local locks the l_pid field is correctly filled in. The ++ * results may not be accurate in case of race conditions and in ++ * the presence of hard links, but it's unlikely that an ++ * application would rely on accurate GETLK results in these ++ * cases. If a conflicting lock is not found, this method will be ++ * called, and the filesystem may fill out l_pid by a meaningful ++ * value, or it may leave this field zero. ++ * ++ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid ++ * of the process performing the locking operation. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*lock) (const char *, struct fuse_file_info *, int cmd, ++ struct flock *); ++ ++ /** ++ * Change the access and modification times of a file with ++ * nanosecond resolution ++ * ++ * This supersedes the old utime() interface. New applications ++ * should use this. ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * See the utimensat(2) man page for details. ++ */ ++ int (*utimens) (const char *, const struct timespec tv[2], ++ struct fuse_file_info *fi); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ */ ++ int (*bmap) (const char *, size_t blocksize, uint64_t *idx); ++ ++ /** ++ * Ioctl ++ * ++ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in ++ * 64bit environment. The size and direction of data is ++ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, ++ * data will be NULL, for _IOC_WRITE data is out area, for ++ * _IOC_READ in area and if both are set in/out area. In all ++ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. ++ * ++ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a ++ * directory file handle. ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ int (*ioctl) (const char *, unsigned int cmd, void *arg, ++ struct fuse_file_info *, unsigned int flags, void *data); ++ ++ /** ++ * Poll for IO readiness events ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ */ ++ int (*poll) (const char *, struct fuse_file_info *, ++ struct fuse_pollhandle *ph, unsigned *reventsp); ++ ++ /** Write contents of buffer to an open file ++ * ++ * Similar to the write() method, but data is supplied in a ++ * generic buffer. Use fuse_buf_copy() to transfer data to ++ * the destination. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *); ++ ++ /** Store data from an open file in a buffer ++ * ++ * Similar to the read() method, but data is stored and ++ * returned in a generic buffer. ++ * ++ * No actual copying of data has to take place, the source ++ * file descriptor may simply be stored in the buffer for ++ * later data transfer. ++ * ++ * The buffer must be allocated dynamically and stored at the ++ * location pointed to by bufp. If the buffer contains memory ++ * regions, they too must be allocated using malloc(). The ++ * allocated memory will be freed by the caller. ++ */ ++ int (*read_buf) (const char *, struct fuse_bufvec **bufp, ++ size_t size, off_t off, struct fuse_file_info *); ++ /** ++ * Perform BSD file locking operation ++ * ++ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN ++ * ++ * Nonblocking requests will be indicated by ORing LOCK_NB to ++ * the above operations ++ * ++ * For more information see the flock(2) manual page. ++ * ++ * Additionally fi->owner will be set to a value unique to ++ * this open file. This same value will be supplied to ++ * ->release() when the file is released. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*flock) (const char *, struct fuse_file_info *, int op); ++ ++ /** ++ * Allocates space for an open file ++ * ++ * This function ensures that required space is allocated for specified ++ * file. If this function returns success then any subsequent write ++ * request to specified range is guaranteed not to fail because of lack ++ * of space on the file system media. ++ */ ++ int (*fallocate) (const char *, int, off_t, off_t, ++ struct fuse_file_info *); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ */ ++ ssize_t (*copy_file_range) (const char *path_in, ++ struct fuse_file_info *fi_in, ++ off_t offset_in, const char *path_out, ++ struct fuse_file_info *fi_out, ++ off_t offset_out, size_t size, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ */ ++ off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); ++}; ++ ++/** Extra context that may be needed by some filesystems ++ * ++ * The uid, gid and pid fields are not filled in case of a writepage ++ * operation. ++ */ ++struct fuse_context { ++ /** Pointer to the fuse object */ ++ struct fuse *fuse; ++ ++ /** User ID of the calling process */ ++ uid_t uid; ++ ++ /** Group ID of the calling process */ ++ gid_t gid; ++ ++ /** Process ID of the calling thread */ ++ pid_t pid; ++ ++ /** Private filesystem data */ ++ void *private_data; ++ ++ /** Umask of the calling process */ ++ mode_t umask; ++}; ++ ++/** ++ * Main function of FUSE. ++ * ++ * This is for the lazy. This is all that has to be called from the ++ * main() function. ++ * ++ * This function does the following: ++ * - parses command line options, and handles --help and ++ * --version ++ * - installs signal handlers for INT, HUP, TERM and PIPE ++ * - registers an exit handler to unmount the filesystem on program exit ++ * - creates a fuse handle ++ * - registers the operations ++ * - calls either the single-threaded or the multi-threaded event loop ++ * ++ * Most file systems will have to parse some file-system specific ++ * arguments before calling this function. It is recommended to do ++ * this with fuse_opt_parse() and a processing function that passes ++ * through any unknown options (this can also be achieved by just ++ * passing NULL as the processing function). That way, the remaining ++ * options can be passed directly to fuse_main(). ++ * ++ * fuse_main() accepts all options that can be passed to ++ * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. This element must always be present and is used to ++ * construct a basic ``usage: `` message for the --help ++ * output. argv[0] may also be set to the empty string. In this case ++ * the usage message is suppressed. This can be used by file systems ++ * to print their own usage line first. See hello.c for an example of ++ * how to do this. ++ * ++ * Note: this is currently implemented as a macro. ++ * ++ * The following error codes may be returned from fuse_main(): ++ * 1: Invalid option arguments ++ * 2: No mount point specified ++ * 3: FUSE setup failed ++ * 4: Mounting failed ++ * 5: Failed to daemonize (detach from session) ++ * 6: Failed to set up signal handlers ++ * 7: An error occured during the life of the file system ++ * ++ * @param argc the argument counter passed to the main() function ++ * @param argv the argument vector passed to the main() function ++ * @param op the file system operation ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return 0 on success, nonzero on failure ++ * ++ * Example usage, see hello.c ++ */ ++/* ++ int fuse_main(int argc, char *argv[], const struct fuse_operations *op, ++ void *private_data); ++*/ ++#define fuse_main(argc, argv, op, private_data) \ ++ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) ++ ++/* ----------------------------------------------------------- * ++ * More detailed API * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Print available options (high- and low-level) to stdout. This is ++ * not an exhaustive list, but includes only those options that may be ++ * of interest to an end-user of a file system. ++ * ++ * The function looks at the argument vector only to determine if ++ * there are additional modules to be loaded (module=foo option), ++ * and attempts to call their help functions as well. ++ * ++ * @param args the argument vector. ++ */ ++void fuse_lib_help(struct fuse_args *args); ++ ++/** ++ * Create a new FUSE filesystem. ++ * ++ * This function accepts most file-system independent mount options ++ * (like context, nodev, ro - see mount(8)), as well as the ++ * FUSE-specific mount options from mount.fuse(8). ++ * ++ * If the --help option is specified, the function writes a help text ++ * to stdout and returns NULL. ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. This element must always be present and is used to ++ * construct a basic ``usage: `` message for the --help output. If ++ * argv[0] is set to the empty string, no usage message is included in ++ * the --help output. ++ * ++ * If an unknown option is passed in, an error message is written to ++ * stderr and the function returns NULL. ++ * ++ * @param args argument vector ++ * @param op the filesystem operations ++ * @param op_size the size of the fuse_operations structure ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return the created FUSE handle ++ */ ++#if FUSE_USE_VERSION == 30 ++struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) ++#else ++struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++#endif ++ ++/** ++ * Mount a FUSE file system. ++ * ++ * @param mountpoint the mount point path ++ * @param f the FUSE handle ++ * ++ * @return 0 on success, -1 on failure. ++ **/ ++int fuse_mount(struct fuse *f, const char *mountpoint); ++ ++/** ++ * Unmount a FUSE file system. ++ * ++ * See fuse_session_unmount() for additional information. ++ * ++ * @param f the FUSE handle ++ **/ ++void fuse_unmount(struct fuse *f); ++ ++/** ++ * Destroy the FUSE handle. ++ * ++ * NOTE: This function does not unmount the filesystem. If this is ++ * needed, call fuse_unmount() before calling this function. ++ * ++ * @param f the FUSE handle ++ */ ++void fuse_destroy(struct fuse *f); ++ ++/** ++ * FUSE event loop. ++ * ++ * Requests from the kernel are processed, and the appropriate ++ * operations are called. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * @param f the FUSE handle ++ * @return see fuse_session_loop() ++ * ++ * See also: fuse_loop_mt() ++ */ ++int fuse_loop(struct fuse *f); ++ ++/** ++ * Flag session as terminated ++ * ++ * This function will cause any running event loops to exit on ++ * the next opportunity. ++ * ++ * @param f the FUSE handle ++ */ ++void fuse_exit(struct fuse *f); ++ ++/** ++ * FUSE event loop with multiple threads ++ * ++ * Requests from the kernel are processed, and the appropriate ++ * operations are called. Request are processed in parallel by ++ * distributing them between multiple threads. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in ++ * single-threaded mode, and that you will not have to worry about reentrancy, ++ * though you will have to worry about recursive lookups. In single-threaded ++ * mode, FUSE will wait for one callback to return before calling another. ++ * ++ * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make ++ * multiple simultaneous calls into the various callback functions given by your ++ * fuse_operations record. ++ * ++ * If you are using multiple threads, you can enjoy all the parallel execution ++ * and interactive response benefits of threads, and you get to enjoy all the ++ * benefits of race conditions and locking bugs, too. Ensure that any code used ++ * in the callback function of fuse_operations is also thread-safe. ++ * ++ * @param f the FUSE handle ++ * @param config loop configuration ++ * @return see fuse_session_loop() ++ * ++ * See also: fuse_loop() ++ */ ++#if FUSE_USE_VERSION < 32 ++int fuse_loop_mt_31(struct fuse *f, int clone_fd); ++#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) ++#else ++int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); ++#endif ++ ++/** ++ * Get the current context ++ * ++ * The context is only valid for the duration of a filesystem ++ * operation, and thus must not be stored and used later. ++ * ++ * @return the context ++ */ ++struct fuse_context *fuse_get_context(void); ++ ++/** ++ * Get the current supplementary group IDs for the current request ++ * ++ * Similar to the getgroups(2) system call, except the return value is ++ * always the total number of group IDs, even if it is larger than the ++ * specified size. ++ * ++ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass ++ * the group list to userspace, hence this function needs to parse ++ * "/proc/$TID/task/$TID/status" to get the group IDs. ++ * ++ * This feature may not be supported on all operating systems. In ++ * such a case this function will return -ENOSYS. ++ * ++ * @param size size of given array ++ * @param list array of group IDs to be filled in ++ * @return the total number of supplementary group IDs or -errno on failure ++ */ ++int fuse_getgroups(int size, gid_t list[]); ++ ++/** ++ * Check if the current request has already been interrupted ++ * ++ * @return 1 if the request has been interrupted, 0 otherwise ++ */ ++int fuse_interrupted(void); ++ ++/** ++ * Invalidates cache for the given path. ++ * ++ * This calls fuse_lowlevel_notify_inval_inode internally. ++ * ++ * @return 0 on successful invalidation, negative error value otherwise. ++ * This routine may return -ENOENT to indicate that there was ++ * no entry to be invalidated, e.g., because the path has not ++ * been seen before or has been forgotten; this should not be ++ * considered to be an error. ++ */ ++int fuse_invalidate_path(struct fuse *f, const char *path); ++ ++/** ++ * The real main function ++ * ++ * Do not call this directly, use fuse_main() ++ */ ++int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++ ++/** ++ * Start the cleanup thread when using option "remember". ++ * ++ * This is done automatically by fuse_loop_mt() ++ * @param fuse struct fuse pointer for fuse instance ++ * @return 0 on success and -1 on error ++ */ ++int fuse_start_cleanup_thread(struct fuse *fuse); ++ ++/** ++ * Stop the cleanup thread when using option "remember". ++ * ++ * This is done automatically by fuse_loop_mt() ++ * @param fuse struct fuse pointer for fuse instance ++ */ ++void fuse_stop_cleanup_thread(struct fuse *fuse); ++ ++/** ++ * Iterate over cache removing stale entries ++ * use in conjunction with "-oremember" ++ * ++ * NOTE: This is already done for the standard sessions ++ * ++ * @param fuse struct fuse pointer for fuse instance ++ * @return the number of seconds until the next cleanup ++ */ ++int fuse_clean_cache(struct fuse *fuse); ++ ++/* ++ * Stacking API ++ */ ++ ++/** ++ * Fuse filesystem object ++ * ++ * This is opaque object represents a filesystem layer ++ */ ++struct fuse_fs; ++ ++/* ++ * These functions call the relevant filesystem operation, and return ++ * the result. ++ * ++ * If the operation is not defined, they return -ENOSYS, with the ++ * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, ++ * fuse_fs_releasedir and fuse_fs_statfs, which return 0. ++ */ ++ ++int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, ++ struct fuse_file_info *fi); ++int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, ++ const char *newpath, unsigned int flags); ++int fuse_fs_unlink(struct fuse_fs *fs, const char *path); ++int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); ++int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, ++ const char *path); ++int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); ++int fuse_fs_release(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_open(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, ++ off_t off, struct fuse_file_info *fi); ++int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, ++ struct fuse_bufvec **bufp, size_t size, off_t off, ++ struct fuse_file_info *fi); ++int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, ++ size_t size, off_t off, struct fuse_file_info *fi); ++int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, ++ struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *fi); ++int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, ++ struct fuse_file_info *fi); ++int fuse_fs_flush(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); ++int fuse_fs_opendir(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, ++ fuse_fill_dir_t filler, off_t off, ++ struct fuse_file_info *fi, enum fuse_readdir_flags flags); ++int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, ++ struct fuse_file_info *fi); ++int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, ++ struct fuse_file_info *fi); ++int fuse_fs_lock(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, int cmd, struct flock *lock); ++int fuse_fs_flock(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, int op); ++int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, ++ struct fuse_file_info *fi); ++int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, ++ struct fuse_file_info *fi); ++int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, ++ struct fuse_file_info *fi); ++int fuse_fs_utimens(struct fuse_fs *fs, const char *path, ++ const struct timespec tv[2], struct fuse_file_info *fi); ++int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); ++int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, ++ size_t len); ++int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, ++ dev_t rdev); ++int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); ++int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, ++ const char *value, size_t size, int flags); ++int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, ++ char *value, size_t size); ++int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, ++ size_t size); ++int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, ++ const char *name); ++int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, ++ uint64_t *idx); ++int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, ++ void *arg, struct fuse_file_info *fi, unsigned int flags, ++ void *data); ++int fuse_fs_poll(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, struct fuse_pollhandle *ph, ++ unsigned *reventsp); ++int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi); ++ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, ++ struct fuse_file_info *fi_in, off_t off_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t off_out, ++ size_t len, int flags); ++off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, ++ struct fuse_file_info *fi); ++void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, ++ struct fuse_config *cfg); ++void fuse_fs_destroy(struct fuse_fs *fs); ++ ++int fuse_notify_poll(struct fuse_pollhandle *ph); ++ ++/** ++ * Create a new fuse filesystem object ++ * ++ * This is usually called from the factory of a fuse module to create ++ * a new instance of a filesystem. ++ * ++ * @param op the filesystem operations ++ * @param op_size the size of the fuse_operations structure ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return a new filesystem object ++ */ ++struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, ++ void *private_data); ++ ++/** ++ * Factory for creating filesystem objects ++ * ++ * The function may use and remove options from 'args' that belong ++ * to this module. ++ * ++ * For now the 'fs' vector always contains exactly one filesystem. ++ * This is the filesystem which will be below the newly created ++ * filesystem in the stack. ++ * ++ * @param args the command line arguments ++ * @param fs NULL terminated filesystem object vector ++ * @return the new filesystem object ++ */ ++typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, ++ struct fuse_fs *fs[]); ++/** ++ * Register filesystem module ++ * ++ * If the "-omodules=*name*_:..." option is present, filesystem ++ * objects are created and pushed onto the stack with the *factory_* ++ * function. ++ * ++ * @param name_ the name of this filesystem module ++ * @param factory_ the factory function for this filesystem module ++ */ ++#define FUSE_REGISTER_MODULE(name_, factory_) \ ++ fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ ++ ++/** Get session from fuse object */ ++struct fuse_session *fuse_get_session(struct fuse *f); ++ ++/** ++ * Open a FUSE file descriptor and set up the mount for the given ++ * mountpoint and flags. ++ * ++ * @param mountpoint reference to the mount in the file system ++ * @param options mount options ++ * @return the FUSE file descriptor or -1 upon error ++ */ ++int fuse_open_channel(const char *mountpoint, const char *options); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_H_ */ +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +new file mode 100644 +index 0000000..2d686b2 +--- /dev/null ++++ b/tools/virtiofsd/fuse_common.h +@@ -0,0 +1,823 @@ ++/* FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++/** @file */ ++ ++#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) ++#error "Never include directly; use or instead." ++#endif ++ ++#ifndef FUSE_COMMON_H_ ++#define FUSE_COMMON_H_ ++ ++#include "fuse_opt.h" ++#include "fuse_log.h" ++#include ++#include ++ ++/** Major version of FUSE library interface */ ++#define FUSE_MAJOR_VERSION 3 ++ ++/** Minor version of FUSE library interface */ ++#define FUSE_MINOR_VERSION 2 ++ ++#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) ++#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Information about an open file. ++ * ++ * File Handles are created by the open, opendir, and create methods and closed ++ * by the release and releasedir methods. Multiple file handles may be ++ * concurrently open for the same file. Generally, a client will create one ++ * file handle per file descriptor, though in some cases multiple file ++ * descriptors can share a single file handle. ++ */ ++struct fuse_file_info { ++ /** Open flags. Available in open() and release() */ ++ int flags; ++ ++ /** In case of a write operation indicates if this was caused ++ by a delayed write from the page cache. If so, then the ++ context's pid, uid, and gid fields will not be valid, and ++ the *fh* value may not match the *fh* value that would ++ have been sent with the corresponding individual write ++ requests if write caching had been disabled. */ ++ unsigned int writepage : 1; ++ ++ /** Can be filled in by open, to use direct I/O on this file. */ ++ unsigned int direct_io : 1; ++ ++ /** Can be filled in by open. It signals the kernel that any ++ currently cached file data (ie., data that the filesystem ++ provided the last time the file was open) need not be ++ invalidated. Has no effect when set in other contexts (in ++ particular it does nothing when set by opendir()). */ ++ unsigned int keep_cache : 1; ++ ++ /** Indicates a flush operation. Set in flush operation, also ++ maybe set in highlevel lock operation and lowlevel release ++ operation. */ ++ unsigned int flush : 1; ++ ++ /** Can be filled in by open, to indicate that the file is not ++ seekable. */ ++ unsigned int nonseekable : 1; ++ ++ /* Indicates that flock locks for this file should be ++ released. If set, lock_owner shall contain a valid value. ++ May only be set in ->release(). */ ++ unsigned int flock_release : 1; ++ ++ /** Can be filled in by opendir. It signals the kernel to ++ enable caching of entries returned by readdir(). Has no ++ effect when set in other contexts (in particular it does ++ nothing when set by open()). */ ++ unsigned int cache_readdir : 1; ++ ++ /** Padding. Reserved for future use*/ ++ unsigned int padding : 25; ++ unsigned int padding2 : 32; ++ ++ /** File handle id. May be filled in by filesystem in create, ++ * open, and opendir(). Available in most other file operations on the ++ * same file handle. */ ++ uint64_t fh; ++ ++ /** Lock owner id. Available in locking operations and flush */ ++ uint64_t lock_owner; ++ ++ /** Requested poll events. Available in ->poll. Only set on kernels ++ which support it. If unsupported, this field is set to zero. */ ++ uint32_t poll_events; ++}; ++ ++/** ++ * Configuration parameters passed to fuse_session_loop_mt() and ++ * fuse_loop_mt(). ++ */ ++struct fuse_loop_config { ++ /** ++ * whether to use separate device fds for each thread ++ * (may increase performance) ++ */ ++ int clone_fd; ++ ++ /** ++ * The maximum number of available worker threads before they ++ * start to get deleted when they become idle. If not ++ * specified, the default is 10. ++ * ++ * Adjusting this has performance implications; a very small number ++ * of threads in the pool will cause a lot of thread creation and ++ * deletion overhead and performance may suffer. When set to 0, a new ++ * thread will be created to service every operation. ++ */ ++ unsigned int max_idle_threads; ++}; ++ ++/************************************************************************** ++ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * ++ **************************************************************************/ ++ ++/** ++ * Indicates that the filesystem supports asynchronous read requests. ++ * ++ * If this capability is not requested/available, the kernel will ++ * ensure that there is at most one pending read request per ++ * file-handle at any time, and will attempt to order read requests by ++ * increasing offset. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ASYNC_READ (1 << 0) ++ ++/** ++ * Indicates that the filesystem supports "remote" locking. ++ * ++ * This feature is enabled by default when supported by the kernel, ++ * and if getlk() and setlk() handlers are implemented. ++ */ ++#define FUSE_CAP_POSIX_LOCKS (1 << 1) ++ ++/** ++ * Indicates that the filesystem supports the O_TRUNC open flag. If ++ * disabled, and an application specifies O_TRUNC, fuse first calls ++ * truncate() and then open() with O_TRUNC filtered out. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) ++ ++/** ++ * Indicates that the filesystem supports lookups of "." and "..". ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) ++ ++/** ++ * Indicates that the kernel should not apply the umask to the ++ * file mode on create operations. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_DONT_MASK (1 << 6) ++ ++/** ++ * Indicates that libfuse should try to use splice() when writing to ++ * the fuse device. This may improve performance. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_SPLICE_WRITE (1 << 7) ++ ++/** ++ * Indicates that libfuse should try to move pages instead of copying when ++ * writing to / reading from the fuse device. This may improve performance. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_SPLICE_MOVE (1 << 8) ++ ++/** ++ * Indicates that libfuse should try to use splice() when reading from ++ * the fuse device. This may improve performance. ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements a write_buf() handler. ++ */ ++#define FUSE_CAP_SPLICE_READ (1 << 9) ++ ++/** ++ * If set, the calls to flock(2) will be emulated using POSIX locks and must ++ * then be handled by the filesystem's setlock() handler. ++ * ++ * If not set, flock(2) calls will be handled by the FUSE kernel module ++ * internally (so any access that does not go through the kernel cannot be taken ++ * into account). ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements a flock() handler. ++ */ ++#define FUSE_CAP_FLOCK_LOCKS (1 << 10) ++ ++/** ++ * Indicates that the filesystem supports ioctl's on directories. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_IOCTL_DIR (1 << 11) ++ ++/** ++ * Traditionally, while a file is open the FUSE kernel module only ++ * asks the filesystem for an update of the file's attributes when a ++ * client attempts to read beyond EOF. This is unsuitable for ++ * e.g. network filesystems, where the file contents may change ++ * without the kernel knowing about it. ++ * ++ * If this flag is set, FUSE will check the validity of the attributes ++ * on every read. If the attributes are no longer valid (i.e., if the ++ * *attr_timeout* passed to fuse_reply_attr() or set in `struct ++ * fuse_entry_param` has passed), it will first issue a `getattr` ++ * request. If the new mtime differs from the previous value, any ++ * cached file *contents* will be invalidated as well. ++ * ++ * This flag should always be set when available. If all file changes ++ * go through the kernel, *attr_timeout* should be set to a very large ++ * number to avoid unnecessary getattr() calls. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) ++ ++/** ++ * Indicates that the filesystem supports readdirplus. ++ * ++ * This feature is enabled by default when supported by the kernel and if the ++ * filesystem implements a readdirplus() handler. ++ */ ++#define FUSE_CAP_READDIRPLUS (1 << 13) ++ ++/** ++ * Indicates that the filesystem supports adaptive readdirplus. ++ * ++ * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. ++ * ++ * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel ++ * will always issue readdirplus() requests to retrieve directory ++ * contents. ++ * ++ * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel ++ * will issue both readdir() and readdirplus() requests, depending on ++ * how much information is expected to be required. ++ * ++ * As of Linux 4.20, the algorithm is as follows: when userspace ++ * starts to read directory entries, issue a READDIRPLUS request to ++ * the filesystem. If any entry attributes have been looked up by the ++ * time userspace requests the next batch of entries continue with ++ * READDIRPLUS, otherwise switch to plain READDIR. This will reasult ++ * in eg plain "ls" triggering READDIRPLUS first then READDIR after ++ * that because it doesn't do lookups. "ls -l" should result in all ++ * READDIRPLUS, except if dentries are already cached. ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements both a readdirplus() and a readdir() ++ * handler. ++ */ ++#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) ++ ++/** ++ * Indicates that the filesystem supports asynchronous direct I/O submission. ++ * ++ * If this capability is not requested/available, the kernel will ensure that ++ * there is at most one pending read and one pending write request per direct ++ * I/O file-handle at any time. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ASYNC_DIO (1 << 15) ++ ++/** ++ * Indicates that writeback caching should be enabled. This means that ++ * individual write request may be buffered and merged in the kernel ++ * before they are send to the filesystem. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) ++ ++/** ++ * Indicates support for zero-message opens. If this flag is set in ++ * the `capable` field of the `fuse_conn_info` structure, then the ++ * filesystem may return `ENOSYS` from the open() handler to indicate ++ * success. Further attempts to open files will be handled in the ++ * kernel. (If this flag is not set, returning ENOSYS will be treated ++ * as an error and signaled to the caller). ++ * ++ * Setting (or unsetting) this flag in the `want` field has *no ++ * effect*. ++ */ ++#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) ++ ++/** ++ * Indicates support for parallel directory operations. If this flag ++ * is unset, the FUSE kernel module will ensure that lookup() and ++ * readdir() requests are never issued concurrently for the same ++ * directory. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) ++ ++/** ++ * Indicates support for POSIX ACLs. ++ * ++ * If this feature is enabled, the kernel will cache and have ++ * responsibility for enforcing ACLs. ACL will be stored as xattrs and ++ * passed to userspace, which is responsible for updating the ACLs in ++ * the filesystem, keeping the file mode in sync with the ACL, and ++ * ensuring inheritance of default ACLs when new filesystem nodes are ++ * created. Note that this requires that the file system is able to ++ * parse and interpret the xattr representation of ACLs. ++ * ++ * Enabling this feature implicitly turns on the ++ * ``default_permissions`` mount option (even if it was not passed to ++ * mount(2)). ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_POSIX_ACL (1 << 19) ++ ++/** ++ * Indicates that the filesystem is responsible for unsetting ++ * setuid and setgid bits when a file is written, truncated, or ++ * its owner is changed. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) ++ ++/** ++ * Indicates support for zero-message opendirs. If this flag is set in ++ * the `capable` field of the `fuse_conn_info` structure, then the filesystem ++ * may return `ENOSYS` from the opendir() handler to indicate success. Further ++ * opendir and releasedir messages will be handled in the kernel. (If this ++ * flag is not set, returning ENOSYS will be treated as an error and signalled ++ * to the caller.) ++ * ++ * Setting (or unsetting) this flag in the `want` field has *no effect*. ++ */ ++#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) ++ ++/** ++ * Ioctl flags ++ * ++ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine ++ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed ++ * FUSE_IOCTL_RETRY: retry with new iovecs ++ * FUSE_IOCTL_DIR: is a directory ++ * ++ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs ++ */ ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_DIR (1 << 4) ++ ++#define FUSE_IOCTL_MAX_IOV 256 ++ ++/** ++ * Connection information, passed to the ->init() method ++ * ++ * Some of the elements are read-write, these can be changed to ++ * indicate the value requested by the filesystem. The requested ++ * value must usually be smaller than the indicated value. ++ */ ++struct fuse_conn_info { ++ /** ++ * Major version of the protocol (read-only) ++ */ ++ unsigned proto_major; ++ ++ /** ++ * Minor version of the protocol (read-only) ++ */ ++ unsigned proto_minor; ++ ++ /** ++ * Maximum size of the write buffer ++ */ ++ unsigned max_write; ++ ++ /** ++ * Maximum size of read requests. A value of zero indicates no ++ * limit. However, even if the filesystem does not specify a ++ * limit, the maximum size of read requests will still be ++ * limited by the kernel. ++ * ++ * NOTE: For the time being, the maximum size of read requests ++ * must be set both here *and* passed to fuse_session_new() ++ * using the ``-o max_read=`` mount option. At some point ++ * in the future, specifying the mount option will no longer ++ * be necessary. ++ */ ++ unsigned max_read; ++ ++ /** ++ * Maximum readahead ++ */ ++ unsigned max_readahead; ++ ++ /** ++ * Capability flags that the kernel supports (read-only) ++ */ ++ unsigned capable; ++ ++ /** ++ * Capability flags that the filesystem wants to enable. ++ * ++ * libfuse attempts to initialize this field with ++ * reasonable default values before calling the init() handler. ++ */ ++ unsigned want; ++ ++ /** ++ * Maximum number of pending "background" requests. A ++ * background request is any type of request for which the ++ * total number is not limited by other means. As of kernel ++ * 4.8, only two types of requests fall into this category: ++ * ++ * 1. Read-ahead requests ++ * 2. Asynchronous direct I/O requests ++ * ++ * Read-ahead requests are generated (if max_readahead is ++ * non-zero) by the kernel to preemptively fill its caches ++ * when it anticipates that userspace will soon read more ++ * data. ++ * ++ * Asynchronous direct I/O requests are generated if ++ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large ++ * direct I/O request. In this case the kernel will internally ++ * split it up into multiple smaller requests and submit them ++ * to the filesystem concurrently. ++ * ++ * Note that the following requests are *not* background ++ * requests: writeback requests (limited by the kernel's ++ * flusher algorithm), regular (i.e., synchronous and ++ * buffered) userspace read/write requests (limited to one per ++ * thread), asynchronous read requests (Linux's io_submit(2) ++ * call actually blocks, so these are also limited to one per ++ * thread). ++ */ ++ unsigned max_background; ++ ++ /** ++ * Kernel congestion threshold parameter. If the number of pending ++ * background requests exceeds this number, the FUSE kernel module will ++ * mark the filesystem as "congested". This instructs the kernel to ++ * expect that queued requests will take some time to complete, and to ++ * adjust its algorithms accordingly (e.g. by putting a waiting thread ++ * to sleep instead of using a busy-loop). ++ */ ++ unsigned congestion_threshold; ++ ++ /** ++ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible ++ * for updating mtime and ctime when write requests are received. The ++ * updated values are passed to the filesystem with setattr() requests. ++ * However, if the filesystem does not support the full resolution of ++ * the kernel timestamps (nanoseconds), the mtime and ctime values used ++ * by kernel and filesystem will differ (and result in an apparent ++ * change of times after a cache flush). ++ * ++ * To prevent this problem, this variable can be used to inform the ++ * kernel about the timestamp granularity supported by the file-system. ++ * The value should be power of 10. The default is 1, i.e. full ++ * nano-second resolution. Filesystems supporting only second resolution ++ * should set this to 1000000000. ++ */ ++ unsigned time_gran; ++ ++ /** ++ * For future use. ++ */ ++ unsigned reserved[22]; ++}; ++ ++struct fuse_session; ++struct fuse_pollhandle; ++struct fuse_conn_info_opts; ++ ++/** ++ * This function parses several command-line options that can be used ++ * to override elements of struct fuse_conn_info. The pointer returned ++ * by this function should be passed to the ++ * fuse_apply_conn_info_opts() method by the file system's init() ++ * handler. ++ * ++ * Before using this function, think twice if you really want these ++ * parameters to be adjustable from the command line. In most cases, ++ * they should be determined by the file system internally. ++ * ++ * The following options are recognized: ++ * ++ * -o max_write=N sets conn->max_write ++ * -o max_readahead=N sets conn->max_readahead ++ * -o max_background=N sets conn->max_background ++ * -o congestion_threshold=N sets conn->congestion_threshold ++ * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want ++ * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want ++ * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want ++ * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock ++ * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want ++ * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want ++ * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want ++ * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want ++ * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want ++ * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want ++ * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want ++ * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets ++ * FUSE_CAP_READDIRPLUS_AUTO in conn->want ++ * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and ++ * FUSE_CAP_READDIRPLUS_AUTO in conn->want ++ * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want ++ * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want ++ * -o time_gran=N sets conn->time_gran ++ * ++ * Known options will be removed from *args*, unknown options will be ++ * passed through unchanged. ++ * ++ * @param args argument vector (input+output) ++ * @return parsed options ++ **/ ++struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); ++ ++/** ++ * This function applies the (parsed) parameters in *opts* to the ++ * *conn* pointer. It may modify the following fields: wants, ++ * max_write, max_readahead, congestion_threshold, max_background, ++ * time_gran. A field is only set (or unset) if the corresponding ++ * option has been explicitly set. ++ */ ++void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, ++ struct fuse_conn_info *conn); ++ ++/** ++ * Go into the background ++ * ++ * @param foreground if true, stay in the foreground ++ * @return 0 on success, -1 on failure ++ */ ++int fuse_daemonize(int foreground); ++ ++/** ++ * Get the version of the library ++ * ++ * @return the version ++ */ ++int fuse_version(void); ++ ++/** ++ * Get the full package version string of the library ++ * ++ * @return the package version ++ */ ++const char *fuse_pkgversion(void); ++ ++/** ++ * Destroy poll handle ++ * ++ * @param ph the poll handle ++ */ ++void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); ++ ++/* ----------------------------------------------------------- * ++ * Data buffer * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Buffer flags ++ */ ++enum fuse_buf_flags { ++ /** ++ * Buffer contains a file descriptor ++ * ++ * If this flag is set, the .fd field is valid, otherwise the ++ * .mem fields is valid. ++ */ ++ FUSE_BUF_IS_FD = (1 << 1), ++ ++ /** ++ * Seek on the file descriptor ++ * ++ * If this flag is set then the .pos field is valid and is ++ * used to seek to the given offset before performing ++ * operation on file descriptor. ++ */ ++ FUSE_BUF_FD_SEEK = (1 << 2), ++ ++ /** ++ * Retry operation on file descriptor ++ * ++ * If this flag is set then retry operation on file descriptor ++ * until .size bytes have been copied or an error or EOF is ++ * detected. ++ */ ++ FUSE_BUF_FD_RETRY = (1 << 3), ++}; ++ ++/** ++ * Buffer copy flags ++ */ ++enum fuse_buf_copy_flags { ++ /** ++ * Don't use splice(2) ++ * ++ * Always fall back to using read and write instead of ++ * splice(2) to copy data from one file descriptor to another. ++ * ++ * If this flag is not set, then only fall back if splice is ++ * unavailable. ++ */ ++ FUSE_BUF_NO_SPLICE = (1 << 1), ++ ++ /** ++ * Force splice ++ * ++ * Always use splice(2) to copy data from one file descriptor ++ * to another. If splice is not available, return -EINVAL. ++ */ ++ FUSE_BUF_FORCE_SPLICE = (1 << 2), ++ ++ /** ++ * Try to move data with splice. ++ * ++ * If splice is used, try to move pages from the source to the ++ * destination instead of copying. See documentation of ++ * SPLICE_F_MOVE in splice(2) man page. ++ */ ++ FUSE_BUF_SPLICE_MOVE = (1 << 3), ++ ++ /** ++ * Don't block on the pipe when copying data with splice ++ * ++ * Makes the operations on the pipe non-blocking (if the pipe ++ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) ++ * man page. ++ */ ++ FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), ++}; ++ ++/** ++ * Single data buffer ++ * ++ * Generic data buffer for I/O, extended attributes, etc... Data may ++ * be supplied as a memory pointer or as a file descriptor ++ */ ++struct fuse_buf { ++ /** ++ * Size of data in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Buffer flags ++ */ ++ enum fuse_buf_flags flags; ++ ++ /** ++ * Memory pointer ++ * ++ * Used unless FUSE_BUF_IS_FD flag is set. ++ */ ++ void *mem; ++ ++ /** ++ * File descriptor ++ * ++ * Used if FUSE_BUF_IS_FD flag is set. ++ */ ++ int fd; ++ ++ /** ++ * File position ++ * ++ * Used if FUSE_BUF_FD_SEEK flag is set. ++ */ ++ off_t pos; ++}; ++ ++/** ++ * Data buffer vector ++ * ++ * An array of data buffers, each containing a memory pointer or a ++ * file descriptor. ++ * ++ * Allocate dynamically to add more than one buffer. ++ */ ++struct fuse_bufvec { ++ /** ++ * Number of buffers in the array ++ */ ++ size_t count; ++ ++ /** ++ * Index of current buffer within the array ++ */ ++ size_t idx; ++ ++ /** ++ * Current offset within the current buffer ++ */ ++ size_t off; ++ ++ /** ++ * Array of buffers ++ */ ++ struct fuse_buf buf[1]; ++}; ++ ++/* Initialize bufvec with a single buffer of given size */ ++#define FUSE_BUFVEC_INIT(size__) \ ++ ((struct fuse_bufvec) { \ ++ /* .count= */ 1, \ ++ /* .idx = */ 0, \ ++ /* .off = */ 0, \ ++ /* .buf = */ { /* [0] = */ { \ ++ /* .size = */ (size__), \ ++ /* .flags = */ (enum fuse_buf_flags) 0, \ ++ /* .mem = */ NULL, \ ++ /* .fd = */ -1, \ ++ /* .pos = */ 0, \ ++ } } \ ++ } ) ++ ++/** ++ * Get total size of data in a fuse buffer vector ++ * ++ * @param bufv buffer vector ++ * @return size of data ++ */ ++size_t fuse_buf_size(const struct fuse_bufvec *bufv); ++ ++/** ++ * Copy data from one buffer vector to another ++ * ++ * @param dst destination buffer vector ++ * @param src source buffer vector ++ * @param flags flags controlling the copy ++ * @return actual number of bytes copied or -errno on error ++ */ ++ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, ++ enum fuse_buf_copy_flags flags); ++ ++/* ----------------------------------------------------------- * ++ * Signal handling * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Exit session on HUP, TERM and INT signals and ignore PIPE signal ++ * ++ * Stores session in a global variable. May only be called once per ++ * process until fuse_remove_signal_handlers() is called. ++ * ++ * Once either of the POSIX signals arrives, the signal handler calls ++ * fuse_session_exit(). ++ * ++ * @param se the session to exit ++ * @return 0 on success, -1 on failure ++ * ++ * See also: ++ * fuse_remove_signal_handlers() ++ */ ++int fuse_set_signal_handlers(struct fuse_session *se); ++ ++/** ++ * Restore default signal handlers ++ * ++ * Resets global session. After this fuse_set_signal_handlers() may ++ * be called again. ++ * ++ * @param se the same session as given in fuse_set_signal_handlers() ++ * ++ * See also: ++ * fuse_set_signal_handlers() ++ */ ++void fuse_remove_signal_handlers(struct fuse_session *se); ++ ++/* ----------------------------------------------------------- * ++ * Compatibility stuff * ++ * ----------------------------------------------------------- */ ++ ++#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 ++# error only API version 30 or greater is supported ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++ ++/* ++ * This interface uses 64 bit off_t. ++ * ++ * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! ++ */ ++ ++#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); ++#else ++struct _fuse_off_t_must_be_64bit_dummy_struct \ ++ { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; ++#endif ++ ++#endif /* FUSE_COMMON_H_ */ +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +new file mode 100644 +index 0000000..d38b630 +--- /dev/null ++++ b/tools/virtiofsd/fuse_i.h +@@ -0,0 +1,139 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "fuse.h" ++#include "fuse_lowlevel.h" ++ ++struct mount_opts; ++ ++struct fuse_req { ++ struct fuse_session *se; ++ uint64_t unique; ++ int ctr; ++ pthread_mutex_t lock; ++ struct fuse_ctx ctx; ++ struct fuse_chan *ch; ++ int interrupted; ++ unsigned int ioctl_64bit : 1; ++ union { ++ struct { ++ uint64_t unique; ++ } i; ++ struct { ++ fuse_interrupt_func_t func; ++ void *data; ++ } ni; ++ } u; ++ struct fuse_req *next; ++ struct fuse_req *prev; ++}; ++ ++struct fuse_notify_req { ++ uint64_t unique; ++ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, ++ const void *, const struct fuse_buf *); ++ struct fuse_notify_req *next; ++ struct fuse_notify_req *prev; ++}; ++ ++struct fuse_session { ++ char *mountpoint; ++ volatile int exited; ++ int fd; ++ struct mount_opts *mo; ++ int debug; ++ int deny_others; ++ struct fuse_lowlevel_ops op; ++ int got_init; ++ struct cuse_data *cuse_data; ++ void *userdata; ++ uid_t owner; ++ struct fuse_conn_info conn; ++ struct fuse_req list; ++ struct fuse_req interrupts; ++ pthread_mutex_t lock; ++ int got_destroy; ++ pthread_key_t pipe_key; ++ int broken_splice_nonblock; ++ uint64_t notify_ctr; ++ struct fuse_notify_req notify_list; ++ size_t bufsize; ++ int error; ++}; ++ ++struct fuse_chan { ++ pthread_mutex_t lock; ++ int ctr; ++ int fd; ++}; ++ ++/** ++ * Filesystem module ++ * ++ * Filesystem modules are registered with the FUSE_REGISTER_MODULE() ++ * macro. ++ * ++ */ ++struct fuse_module { ++ char *name; ++ fuse_module_factory_t factory; ++ struct fuse_module *next; ++ struct fusemod_so *so; ++ int ctr; ++}; ++ ++/* ----------------------------------------------------------- * ++ * Channel interface (when using -o clone_fd) * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Obtain counted reference to the channel ++ * ++ * @param ch the channel ++ * @return the channel ++ */ ++struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); ++ ++/** ++ * Drop counted reference to a channel ++ * ++ * @param ch the channel ++ */ ++void fuse_chan_put(struct fuse_chan *ch); ++ ++struct mount_opts *parse_mount_opts(struct fuse_args *args); ++void destroy_mount_opts(struct mount_opts *mo); ++void fuse_mount_version(void); ++unsigned get_max_read(struct mount_opts *o); ++void fuse_kern_unmount(const char *mountpoint, int fd); ++int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); ++ ++int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, ++ int count); ++void fuse_free_req(fuse_req_t req); ++ ++void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); ++ ++int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); ++ ++int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, ++ struct fuse_chan *ch); ++void fuse_session_process_buf_int(struct fuse_session *se, ++ const struct fuse_buf *buf, struct fuse_chan *ch); ++ ++struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); ++int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); ++ ++#define FUSE_MAX_MAX_PAGES 256 ++#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 ++ ++/* room needed in buffer to accommodate header */ ++#define FUSE_BUFFER_HEADER_SIZE 0x1000 ++ +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +new file mode 100644 +index 0000000..5e112e0 +--- /dev/null ++++ b/tools/virtiofsd/fuse_log.h +@@ -0,0 +1,82 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2019 Red Hat, Inc. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_LOG_H_ ++#define FUSE_LOG_H_ ++ ++/** @file ++ * ++ * This file defines the logging interface of FUSE ++ */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Log severity level ++ * ++ * These levels correspond to syslog(2) log levels since they are widely used. ++ */ ++enum fuse_log_level { ++ FUSE_LOG_EMERG, ++ FUSE_LOG_ALERT, ++ FUSE_LOG_CRIT, ++ FUSE_LOG_ERR, ++ FUSE_LOG_WARNING, ++ FUSE_LOG_NOTICE, ++ FUSE_LOG_INFO, ++ FUSE_LOG_DEBUG ++}; ++ ++/** ++ * Log message handler function. ++ * ++ * This function must be thread-safe. It may be called from any libfuse ++ * function, including fuse_parse_cmdline() and other functions invoked before ++ * a FUSE filesystem is created. ++ * ++ * Install a custom log message handler function using fuse_set_log_func(). ++ * ++ * @param level log severity level ++ * @param fmt sprintf-style format string including newline ++ * @param ap format string arguments ++ */ ++typedef void (*fuse_log_func_t)(enum fuse_log_level level, ++ const char *fmt, va_list ap); ++ ++/** ++ * Install a custom log handler function. ++ * ++ * Log messages are emitted by libfuse functions to report errors and debug ++ * information. Messages are printed to stderr by default but this can be ++ * overridden by installing a custom log message handler function. ++ * ++ * The log message handler function is global and affects all FUSE filesystems ++ * created within this process. ++ * ++ * @param func a custom log message handler function or NULL to revert to ++ * the default ++ */ ++void fuse_set_log_func(fuse_log_func_t func); ++ ++/** ++ * Emit a log message ++ * ++ * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) ++ * @param fmt sprintf-style format string including newline ++ */ ++void fuse_log(enum fuse_log_level level, const char *fmt, ...); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_LOG_H_ */ +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +new file mode 100644 +index 0000000..18c6363 +--- /dev/null ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -0,0 +1,2089 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_LOWLEVEL_H_ ++#define FUSE_LOWLEVEL_H_ ++ ++/** @file ++ * ++ * Low level API ++ * ++ * IMPORTANT: you should define FUSE_USE_VERSION before including this ++ * header. To use the newest API define it to 31 (recommended for any ++ * new application). ++ */ ++ ++#ifndef FUSE_USE_VERSION ++#error FUSE_USE_VERSION not defined ++#endif ++ ++#include "fuse_common.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ----------------------------------------------------------- * ++ * Miscellaneous definitions * ++ * ----------------------------------------------------------- */ ++ ++/** The node ID of the root inode */ ++#define FUSE_ROOT_ID 1 ++ ++/** Inode number type */ ++typedef uint64_t fuse_ino_t; ++ ++/** Request pointer type */ ++typedef struct fuse_req *fuse_req_t; ++ ++/** ++ * Session ++ * ++ * This provides hooks for processing requests, and exiting ++ */ ++struct fuse_session; ++ ++/** Directory entry parameters supplied to fuse_reply_entry() */ ++struct fuse_entry_param { ++ /** Unique inode number ++ * ++ * In lookup, zero means negative entry (from version 2.5) ++ * Returning ENOENT also means negative entry, but by setting zero ++ * ino the kernel may cache negative entries for entry_timeout ++ * seconds. ++ */ ++ fuse_ino_t ino; ++ ++ /** Generation number for this entry. ++ * ++ * If the file system will be exported over NFS, the ++ * ino/generation pairs need to be unique over the file ++ * system's lifetime (rather than just the mount time). So if ++ * the file system reuses an inode after it has been deleted, ++ * it must assign a new, previously unused generation number ++ * to the inode at the same time. ++ * ++ */ ++ uint64_t generation; ++ ++ /** Inode attributes. ++ * ++ * Even if attr_timeout == 0, attr must be correct. For example, ++ * for open(), FUSE uses attr.st_size from lookup() to determine ++ * how many bytes to request. If this value is not correct, ++ * incorrect data will be returned. ++ */ ++ struct stat attr; ++ ++ /** Validity timeout (in seconds) for inode attributes. If ++ attributes only change as a result of requests that come ++ through the kernel, this should be set to a very large ++ value. */ ++ double attr_timeout; ++ ++ /** Validity timeout (in seconds) for the name. If directory ++ entries are changed/deleted only as a result of requests ++ that come through the kernel, this should be set to a very ++ large value. */ ++ double entry_timeout; ++}; ++ ++/** ++ * Additional context associated with requests. ++ * ++ * Note that the reported client uid, gid and pid may be zero in some ++ * situations. For example, if the FUSE file system is running in a ++ * PID or user namespace but then accessed from outside the namespace, ++ * there is no valid uid/pid/gid that could be reported. ++ */ ++struct fuse_ctx { ++ /** User ID of the calling process */ ++ uid_t uid; ++ ++ /** Group ID of the calling process */ ++ gid_t gid; ++ ++ /** Thread ID of the calling process */ ++ pid_t pid; ++ ++ /** Umask of the calling process */ ++ mode_t umask; ++}; ++ ++struct fuse_forget_data { ++ fuse_ino_t ino; ++ uint64_t nlookup; ++}; ++ ++/* 'to_set' flags in setattr */ ++#define FUSE_SET_ATTR_MODE (1 << 0) ++#define FUSE_SET_ATTR_UID (1 << 1) ++#define FUSE_SET_ATTR_GID (1 << 2) ++#define FUSE_SET_ATTR_SIZE (1 << 3) ++#define FUSE_SET_ATTR_ATIME (1 << 4) ++#define FUSE_SET_ATTR_MTIME (1 << 5) ++#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) ++#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) ++#define FUSE_SET_ATTR_CTIME (1 << 10) ++ ++/* ----------------------------------------------------------- * ++ * Request methods and replies * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Low level filesystem operations ++ * ++ * Most of the methods (with the exception of init and destroy) ++ * receive a request handle (fuse_req_t) as their first argument. ++ * This handle must be passed to one of the specified reply functions. ++ * ++ * This may be done inside the method invocation, or after the call ++ * has returned. The request handle is valid until one of the reply ++ * functions is called. ++ * ++ * Other pointer arguments (name, fuse_file_info, etc) are not valid ++ * after the call has returned, so if they are needed later, their ++ * contents have to be copied. ++ * ++ * In general, all methods are expected to perform any necessary ++ * permission checking. However, a filesystem may delegate this task ++ * to the kernel by passing the `default_permissions` mount option to ++ * `fuse_session_new()`. In this case, methods will only be called if ++ * the kernel's permission check has succeeded. ++ * ++ * The filesystem sometimes needs to handle a return value of -ENOENT ++ * from the reply function, which means, that the request was ++ * interrupted, and the reply discarded. For example if ++ * fuse_reply_open() return -ENOENT means, that the release method for ++ * this file will not be called. ++ */ ++struct fuse_lowlevel_ops { ++ /** ++ * Initialize filesystem ++ * ++ * This function is called when libfuse establishes ++ * communication with the FUSE kernel module. The file system ++ * should use this module to inspect and/or modify the ++ * connection parameters provided in the `conn` structure. ++ * ++ * Note that some parameters may be overwritten by options ++ * passed to fuse_session_new() which take precedence over the ++ * values set in this handler. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*init) (void *userdata, struct fuse_conn_info *conn); ++ ++ /** ++ * Clean up filesystem. ++ * ++ * Called on filesystem exit. When this method is called, the ++ * connection to the kernel may be gone already, so that eg. calls ++ * to fuse_lowlevel_notify_* will fail. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*destroy) (void *userdata); ++ ++ /** ++ * Look up a directory entry by name and get its attributes. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name the name to look up ++ */ ++ void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Forget about an inode ++ * ++ * This function is called when the kernel removes an inode ++ * from its internal caches. ++ * ++ * The inode's lookup count increases by one for every call to ++ * fuse_reply_entry and fuse_reply_create. The nlookup parameter ++ * indicates by how much the lookup count should be decreased. ++ * ++ * Inodes with a non-zero lookup count may receive request from ++ * the kernel even after calls to unlink, rmdir or (when ++ * overwriting an existing file) rename. Filesystems must handle ++ * such requests properly and it is recommended to defer removal ++ * of the inode until the lookup count reaches zero. Calls to ++ * unlink, rmdir or rename will be followed closely by forget ++ * unless the file or directory is open, in which case the ++ * kernel issues forget only after the release or releasedir ++ * calls. ++ * ++ * Note that if a file system will be exported over NFS the ++ * inodes lifetime must extend even beyond forget. See the ++ * generation field in struct fuse_entry_param above. ++ * ++ * On unmount the lookup count for all inodes implicitly drops ++ * to zero. It is not guaranteed that the file system will ++ * receive corresponding forget messages for the affected ++ * inodes. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param nlookup the number of lookups to forget ++ */ ++ void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); ++ ++ /** ++ * Get file attributes. ++ * ++ * If writeback caching is enabled, the kernel may have a ++ * better idea of a file's length than the FUSE file system ++ * (eg if there has been a write that extended the file size, ++ * but that has not yet been passed to the filesystem.n ++ * ++ * In this case, the st_size value provided by the file system ++ * will be ignored. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi for future use, currently always NULL ++ */ ++ void (*getattr) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Set file attributes ++ * ++ * In the 'attr' argument only members indicated by the 'to_set' ++ * bitmask contain valid values. Other members contain undefined ++ * values. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits if the file ++ * size or owner is being changed. ++ * ++ * If the setattr was invoked from the ftruncate() system call ++ * under Linux kernel versions 2.6.15 or later, the fi->fh will ++ * contain the value set by the open method or will be undefined ++ * if the open method didn't set any value. Otherwise (not ++ * ftruncate call, or kernel version earlier than 2.6.15) the fi ++ * parameter will be NULL. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param attr the attributes ++ * @param to_set bit mask of attributes which should be set ++ * @param fi file information, or NULL ++ */ ++ void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int to_set, struct fuse_file_info *fi); ++ ++ /** ++ * Read symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_readlink ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ */ ++ void (*readlink) (fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Create file node ++ * ++ * Create a regular file, character device, block device, fifo or ++ * socket node. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param rdev the device number (only valid if created file is a device) ++ */ ++ void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev); ++ ++ /** ++ * Create a directory ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode with which to create the new file ++ */ ++ void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode); ++ ++ /** ++ * Remove a file ++ * ++ * If the file's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Remove a directory ++ * ++ * If the directory's inode's lookup count is non-zero, the ++ * file system is expected to postpone any removal of the ++ * inode until the lookup count reaches zero (see description ++ * of the forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Create a symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param link the contents of the symbolic link ++ * @param parent inode number of the parent directory ++ * @param name to create ++ */ ++ void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name); ++ ++ /** Rename a file ++ * ++ * If the target exists it should be atomically replaced. If ++ * the target's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EINVAL, i.e. all ++ * future bmap requests will fail with EINVAL without being ++ * send to the filesystem process. ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the old parent directory ++ * @param name old name ++ * @param newparent inode number of the new parent directory ++ * @param newname new name ++ */ ++ void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags); ++ ++ /** ++ * Create a hard link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the old inode number ++ * @param newparent inode number of the new parent directory ++ * @param newname new name to create ++ */ ++ void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, ++ const char *newname); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used ++ * by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount ++ * option is given, this check is already done by the ++ * kernel before calling open() and may thus be omitted by ++ * the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open and release will also succeed without being ++ * sent to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*open) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Read data ++ * ++ * Read should send exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the file ++ * has been opened in 'direct_io' mode, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_iov ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size number of bytes to read ++ * @param off offset to read from ++ * @param fi file information ++ */ ++ void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Write data ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the file has ++ * been opened in 'direct_io' mode, in which case the return value ++ * of the write system call will reflect the return value of this ++ * operation. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param buf data to write ++ * @param size number of bytes to write ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, ++ size_t size, off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Flush method ++ * ++ * This is called on each close() of the opened file. ++ * ++ * Since file descriptors can be duplicated (dup, dup2, fork), for ++ * one open call there may be many flush calls. ++ * ++ * Filesystems shouldn't assume that flush will always be called ++ * after some writes, or that if will be called at all. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * NOTE: the name of the method is misleading, since (unlike ++ * fsync) the filesystem is not forced to flush pending writes. ++ * One reason to flush data is if the filesystem wants to return ++ * write errors during close. However, such use is non-portable ++ * because POSIX does not require [close] to wait for delayed I/O to ++ * complete. ++ * ++ * If the filesystem supports file locking operations (setlk, ++ * getlk) it should remove all locks belonging to 'fi->owner'. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to flush() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * ++ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ void (*flush) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open call there will be exactly one release call (unless ++ * the filesystem is force-unmounted). ++ * ++ * The filesystem may reply with an error, but error values are ++ * not returned to close() or munmap() which triggered the ++ * release. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * fi->flags will contain the same flags as for open. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*release) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsync() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Open a directory ++ * ++ * Filesystem may store an arbitrary file handle (pointer, index, ++ * etc) in fi->fh, and use this in other all other directory ++ * stream operations (readdir, releasedir, fsyncdir). ++ * ++ * If this request is answered with an error code of ENOSYS and ++ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, ++ * this is treated as success and future calls to opendir and ++ * releasedir will also succeed without being sent to the filesystem ++ * process. In addition, the kernel will cache readdir results ++ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*opendir) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Read directory ++ * ++ * Send a buffer filled using fuse_add_direntry(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Returning a directory entry from readdir() does not affect ++ * its lookup count. ++ * ++ * If off_t is non-zero, then it will correspond to one of the off_t ++ * values that was previously returned by readdir() for the same ++ * directory handle. In this case, readdir() should skip over entries ++ * coming before the position defined by the off_t value. If entries ++ * are added or removed while the directory handle is open, they filesystem ++ * may still include the entries that have been removed, and may not ++ * report the entries that have been created. However, addition or ++ * removal of entries must never cause readdir() to skip over unrelated ++ * entries or to report them more than once. This means ++ * that off_t can not be a simple index that enumerates the entries ++ * that have been returned but must contain sufficient information to ++ * uniquely determine the next directory entry to return even when the ++ * set of entries is changing. ++ * ++ * The function does not have to report the '.' and '..' ++ * entries, but is allowed to do so. Note that, if readdir does ++ * not return '.' or '..', they will not be implicitly returned, ++ * and this behavior is observable by the caller. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open directory ++ * ++ * For every opendir call there will be exactly one releasedir ++ * call (unless the filesystem is force-unmounted). ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*releasedir) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the directory ++ * contents should be flushed, not the meta data. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsyncdir() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Get file system statistics ++ * ++ * Valid replies: ++ * fuse_reply_statfs ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number, zero means "undefined" ++ */ ++ void (*statfs) (fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Set an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future setxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ */ ++ void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags); ++ ++ /** ++ * Get an extended attribute ++ * ++ * If size is zero, the size of the value should be sent with ++ * fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the value fits in the buffer, the ++ * value should be sent with fuse_reply_buf. ++ * ++ * If the size is too small for the value, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future getxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ * @param size maximum size of the value to send ++ */ ++ void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size); ++ ++ /** ++ * List extended attribute names ++ * ++ * If size is zero, the total size of the attribute list should be ++ * sent with fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the null character separated ++ * attribute list fits in the buffer, the list should be sent with ++ * fuse_reply_buf. ++ * ++ * If the size is too small for the list, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future listxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum size of the list to send ++ */ ++ void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); ++ ++ /** ++ * Remove an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future removexattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ */ ++ void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() and chdir() system ++ * calls. If the 'default_permissions' mount option is given, ++ * this method is not called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent success, i.e. this and all future access() ++ * requests will succeed without being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param mask requested access mode ++ */ ++ void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * See the description of the open handler for more ++ * information. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ * ++ * If this request is answered with an error code of ENOSYS, the handler ++ * is treated as not implemented (i.e., for this and future requests the ++ * mknod() and open() handlers will be called instead). ++ * ++ * Valid replies: ++ * fuse_reply_create ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param fi file information ++ */ ++ void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi); ++ ++ /** ++ * Test for a POSIX file lock ++ * ++ * Valid replies: ++ * fuse_reply_lock ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to test ++ */ ++ void (*getlk) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, struct flock *lock); ++ ++ /** ++ * Acquire, modify or release a POSIX file lock ++ * ++ * For POSIX threads (NPTL) there's a 1-1 relation between pid and ++ * owner, but otherwise this is not always the case. For checking ++ * lock ownership, 'fi->owner' must be used. The l_pid field in ++ * 'struct flock' should only be used to fill in this field in ++ * getlk(). ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to set ++ * @param sleep locking operation may sleep ++ */ ++ void (*setlk) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, ++ struct flock *lock, int sleep); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future bmap() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_bmap ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param blocksize unit of block index ++ * @param idx block index within file ++ */ ++ void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, ++ uint64_t idx); ++ ++ /** ++ * Ioctl ++ * ++ * Note: For unrestricted ioctls (not allowed for FUSE ++ * servers), data in and out areas can be discovered by giving ++ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For ++ * restricted ioctls, kernel prepares in/out data area ++ * according to the information encoded in cmd. ++ * ++ * Valid replies: ++ * fuse_reply_ioctl_retry ++ * fuse_reply_ioctl ++ * fuse_reply_ioctl_iov ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param cmd ioctl command ++ * @param arg ioctl argument ++ * @param fi file information ++ * @param flags for FUSE_IOCTL_* flags ++ * @param in_buf data fetched from the caller ++ * @param in_bufsz number of fetched bytes ++ * @param out_bufsz maximum size of output data ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, ++ void *arg, struct fuse_file_info *fi, unsigned flags, ++ const void *in_buf, size_t in_bufsz, size_t out_bufsz); ++ ++ /** ++ * Poll for IO readiness ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_lowlevel_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as success (with a kernel-defined default poll-mask) and ++ * future calls to pull() will succeed the same way without being send ++ * to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_poll ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param ph poll handle to be used for notification ++ */ ++ void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct fuse_pollhandle *ph); ++ ++ /** ++ * Write data made available in a buffer ++ * ++ * This is a more generic version of the ->write() method. If ++ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the ++ * kernel supports splicing from the fuse device, then the ++ * data will be made available in pipe for supporting zero ++ * copy data transfer. ++ * ++ * buf->count is guaranteed to be one (and thus buf->idx is ++ * always zero). The write_buf handler must ensure that ++ * bufv->off is correctly updated (reflecting the number of ++ * bytes read from bufv->buf[0]). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param bufv buffer containing the data ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write_buf) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_bufvec *bufv, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Callback function for the retrieve request ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() ++ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() ++ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() ++ * @param bufv the buffer containing the returned data ++ */ ++ void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv); ++ ++ /** ++ * Forget about multiple inodes ++ * ++ * See description of the forget function for more ++ * information. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ */ ++ void (*forget_multi) (fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets); ++ ++ /** ++ * Acquire, modify or release a BSD file lock ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param op the locking operation, see flock(2) ++ */ ++ void (*flock) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, int op); ++ ++ /** ++ * Allocate requested space. If this function returns success then ++ * subsequent writes to the specified range shall not fail due to the lack ++ * of free space on the file system storage media. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future fallocate() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param offset starting point for allocated region ++ * @param length size of allocated region ++ * @param mode determines the operation to be performed on the given range, ++ * see fallocate(2) ++ */ ++ void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory with attributes ++ * ++ * Send a buffer filled using fuse_add_direntry_plus(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * In contrast to readdir() (which does not affect the lookup counts), ++ * the lookup count of every entry returned by readdirplus(), except "." ++ * and "..", is incremented by one. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future copy_file_range() requests will fail with EOPNOTSUPP without ++ * being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino_in the inode number or the source file ++ * @param off_in starting point from were the data should be read ++ * @param fi_in file information of the source file ++ * @param ino_out the inode number or the destination file ++ * @param off_out starting point where the data should be written ++ * @param fi_out file information of the destination file ++ * @param len maximum size of the data to copy ++ * @param flags passed along with the copy_file_range() syscall ++ */ ++ void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, ++ off_t off_in, struct fuse_file_info *fi_in, ++ fuse_ino_t ino_out, off_t off_out, ++ struct fuse_file_info *fi_out, size_t len, ++ int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future lseek() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_lseek ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param off offset to start search from ++ * @param whence either SEEK_DATA or SEEK_HOLE ++ * @param fi file information ++ */ ++ void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi); ++}; ++ ++/** ++ * Reply with an error code or success. ++ * ++ * Possible requests: ++ * all except forget ++ * ++ * Whereever possible, error codes should be chosen from the list of ++ * documented error conditions in the corresponding system calls ++ * manpage. ++ * ++ * An error code of ENOSYS is sometimes treated specially. This is ++ * indicated in the documentation of the affected handler functions. ++ * ++ * The following requests may be answered with a zero error code: ++ * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, ++ * removexattr, setlk. ++ * ++ * @param req request handle ++ * @param err the positive error value, or zero for success ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_err(fuse_req_t req, int err); ++ ++/** ++ * Don't send reply ++ * ++ * Possible requests: ++ * forget ++ * forget_multi ++ * retrieve_reply ++ * ++ * @param req request handle ++ */ ++void fuse_reply_none(fuse_req_t req); ++ ++/** ++ * Reply with a directory entry ++ * ++ * Possible requests: ++ * lookup, mknod, mkdir, symlink, link ++ * ++ * Side effects: ++ * increments the lookup count on success ++ * ++ * @param req request handle ++ * @param e the entry parameters ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); ++ ++/** ++ * Reply with a directory entry and open parameters ++ * ++ * currently the following members of 'fi' are used: ++ * fh, direct_io, keep_cache ++ * ++ * Possible requests: ++ * create ++ * ++ * Side effects: ++ * increments the lookup count on success ++ * ++ * @param req request handle ++ * @param e the entry parameters ++ * @param fi file information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, ++ const struct fuse_file_info *fi); ++ ++/** ++ * Reply with attributes ++ * ++ * Possible requests: ++ * getattr, setattr ++ * ++ * @param req request handle ++ * @param attr the attributes ++ * @param attr_timeout validity timeout (in seconds) for the attributes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_attr(fuse_req_t req, const struct stat *attr, ++ double attr_timeout); ++ ++/** ++ * Reply with the contents of a symbolic link ++ * ++ * Possible requests: ++ * readlink ++ * ++ * @param req request handle ++ * @param link symbolic link contents ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_readlink(fuse_req_t req, const char *link); ++ ++/** ++ * Reply with open parameters ++ * ++ * currently the following members of 'fi' are used: ++ * fh, direct_io, keep_cache ++ * ++ * Possible requests: ++ * open, opendir ++ * ++ * @param req request handle ++ * @param fi file information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); ++ ++/** ++ * Reply with number of bytes written ++ * ++ * Possible requests: ++ * write ++ * ++ * @param req request handle ++ * @param count the number of bytes written ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_write(fuse_req_t req, size_t count); ++ ++/** ++ * Reply with data ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * @param req request handle ++ * @param buf buffer containing data ++ * @param size the size of data in bytes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); ++ ++/** ++ * Reply with data copied/moved from buffer(s) ++ * ++ * Zero copy data transfer ("splicing") will be used under ++ * the following circumstances: ++ * ++ * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and ++ * 2. the kernel supports splicing from the fuse device ++ * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and ++ * 3. *flags* does not contain FUSE_BUF_NO_SPLICE ++ * 4. The amount of data that is provided in file-descriptor backed ++ * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) ++ * is at least twice the page size. ++ * ++ * In order for SPLICE_F_MOVE to be used, the following additional ++ * conditions have to be fulfilled: ++ * ++ * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and ++ * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in ++ fuse_conn_info.capable), and ++ * 3. *flags* contains FUSE_BUF_SPLICE_MOVE ++ * ++ * Note that, if splice is used, the data is actually spliced twice: ++ * once into a temporary pipe (to prepend header data), and then again ++ * into the kernel. If some of the provided buffers are memory-backed, ++ * the data in them is copied in step one and spliced in step two. ++ * ++ * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags ++ * are silently ignored. ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * Side effects: ++ * when used to return data from a readdirplus() (but not readdir()) ++ * call, increments the lookup count of each returned entry by one ++ * on success. ++ * ++ * @param req request handle ++ * @param bufv buffer vector ++ * @param flags flags controlling the copy ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); ++ ++/** ++ * Reply with data vector ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * @param req request handle ++ * @param iov the vector containing the data ++ * @param count the size of vector ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); ++ ++/** ++ * Reply with filesystem statistics ++ * ++ * Possible requests: ++ * statfs ++ * ++ * @param req request handle ++ * @param stbuf filesystem statistics ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); ++ ++/** ++ * Reply with needed buffer size ++ * ++ * Possible requests: ++ * getxattr, listxattr ++ * ++ * @param req request handle ++ * @param count the buffer size needed in bytes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_xattr(fuse_req_t req, size_t count); ++ ++/** ++ * Reply with file lock information ++ * ++ * Possible requests: ++ * getlk ++ * ++ * @param req request handle ++ * @param lock the lock information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_lock(fuse_req_t req, const struct flock *lock); ++ ++/** ++ * Reply with block index ++ * ++ * Possible requests: ++ * bmap ++ * ++ * @param req request handle ++ * @param idx block index within device ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_bmap(fuse_req_t req, uint64_t idx); ++ ++/* ----------------------------------------------------------- * ++ * Filling a buffer in readdir * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Add a directory entry to the buffer ++ * ++ * Buffer needs to be large enough to hold the entry. If it's not, ++ * then the entry is not filled in but the size of the entry is still ++ * returned. The caller can check this by comparing the bufsize ++ * parameter with the returned entry size. If the entry size is ++ * larger than the buffer size, the operation failed. ++ * ++ * From the 'stbuf' argument the st_ino field and bits 12-15 of the ++ * st_mode field are used. The other fields are ignored. ++ * ++ * *off* should be any non-zero value that the filesystem can use to ++ * identify the current point in the directory stream. It does not ++ * need to be the actual physical position. A value of zero is ++ * reserved to mean "from the beginning", and should therefore never ++ * be used (the first call to fuse_add_direntry should be passed the ++ * offset of the second directory entry). ++ * ++ * @param req request handle ++ * @param buf the point where the new entry will be added to the buffer ++ * @param bufsize remaining size of the buffer ++ * @param name the name of the entry ++ * @param stbuf the file attributes ++ * @param off the offset of the next entry ++ * @return the space needed for the entry ++ */ ++size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, const struct stat *stbuf, ++ off_t off); ++ ++/** ++ * Add a directory entry to the buffer with the attributes ++ * ++ * See documentation of `fuse_add_direntry()` for more details. ++ * ++ * @param req request handle ++ * @param buf the point where the new entry will be added to the buffer ++ * @param bufsize remaining size of the buffer ++ * @param name the name of the entry ++ * @param e the directory entry ++ * @param off the offset of the next entry ++ * @return the space needed for the entry ++ */ ++size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, ++ const struct fuse_entry_param *e, off_t off); ++ ++/** ++ * Reply to ask for data fetch and output buffer preparation. ioctl ++ * will be retried with the specified input data fetched and output ++ * buffer prepared. ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param in_iov iovec specifying data to fetch from the caller ++ * @param in_count number of entries in in_iov ++ * @param out_iov iovec specifying addresses to write output to ++ * @param out_count number of entries in out_iov ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_ioctl_retry(fuse_req_t req, ++ const struct iovec *in_iov, size_t in_count, ++ const struct iovec *out_iov, size_t out_count); ++ ++/** ++ * Reply to finish ioctl ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param result result to be passed to the caller ++ * @param buf buffer containing output data ++ * @param size length of output data ++ */ ++int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); ++ ++/** ++ * Reply to finish ioctl with iov buffer ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param result result to be passed to the caller ++ * @param iov the vector containing the data ++ * @param count the size of vector ++ */ ++int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, ++ int count); ++ ++/** ++ * Reply with poll result event mask ++ * ++ * @param req request handle ++ * @param revents poll result event mask ++ */ ++int fuse_reply_poll(fuse_req_t req, unsigned revents); ++ ++/** ++ * Reply with offset ++ * ++ * Possible requests: ++ * lseek ++ * ++ * @param req request handle ++ * @param off offset of next data or hole ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_lseek(fuse_req_t req, off_t off); ++ ++/* ----------------------------------------------------------- * ++ * Notification * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Notify IO readiness event ++ * ++ * For more information, please read comment for poll operation. ++ * ++ * @param ph poll handle to notify IO readiness event for ++ */ ++int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); ++ ++/** ++ * Notify to invalidate cache for an inode. ++ * ++ * Added in FUSE protocol version 7.12. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * If the filesystem has writeback caching enabled, invalidating an ++ * inode will first trigger a writeback of all dirty pages. The call ++ * will block until all writeback requests have completed and the ++ * inode has been invalidated. It will, however, not wait for ++ * completion of pending writeback requests that have been issued ++ * before. ++ * ++ * If there are no dirty pages, this function will never block. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param off the offset in the inode where to start invalidating ++ * or negative to invalidate attributes only ++ * @param len the amount of cache to invalidate or 0 for all ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, ++ off_t off, off_t len); ++ ++/** ++ * Notify to invalidate parent attributes and the dentry matching ++ * parent/name ++ * ++ * To avoid a deadlock this function must not be called in the ++ * execution path of a related filesytem operation or within any code ++ * that could hold a lock that could be needed to execute such an ++ * operation. As of kernel 4.18, a "related operation" is a lookup(), ++ * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() ++ * request for the parent, and a setattr(), unlink(), rmdir(), ++ * rename(), setxattr(), removexattr(), readdir() or readdirplus() ++ * request for the inode itself. ++ * ++ * When called correctly, this function will never block. ++ * ++ * Added in FUSE protocol version 7.12. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param parent inode number ++ * @param name file name ++ * @param namelen strlen() of file name ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, ++ const char *name, size_t namelen); ++ ++/** ++ * This function behaves like fuse_lowlevel_notify_inval_entry() with ++ * the following additional effect (at least as of Linux kernel 4.8): ++ * ++ * If the provided *child* inode matches the inode that is currently ++ * associated with the cached dentry, and if there are any inotify ++ * watches registered for the dentry, then the watchers are informed ++ * that the dentry has been deleted. ++ * ++ * To avoid a deadlock this function must not be called while ++ * executing a related filesytem operation or while holding a lock ++ * that could be needed to execute such an operation (see the ++ * description of fuse_lowlevel_notify_inval_entry() for more ++ * details). ++ * ++ * When called correctly, this function will never block. ++ * ++ * Added in FUSE protocol version 7.18. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param parent inode number ++ * @param child inode number ++ * @param name file name ++ * @param namelen strlen() of file name ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_delete(struct fuse_session *se, ++ fuse_ino_t parent, fuse_ino_t child, ++ const char *name, size_t namelen); ++ ++/** ++ * Store data to the kernel buffers ++ * ++ * Synchronously store data in the kernel buffers belonging to the ++ * given inode. The stored data is marked up-to-date (no read will be ++ * performed against it, unless it's invalidated or evicted from the ++ * cache). ++ * ++ * If the stored data overflows the current file size, then the size ++ * is extended, similarly to a write(2) on the filesystem. ++ * ++ * If this function returns an error, then the store wasn't fully ++ * completed, but it may have been partially completed. ++ * ++ * Added in FUSE protocol version 7.15. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param offset the starting offset into the file to store to ++ * @param bufv buffer vector ++ * @param flags flags controlling the copy ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); ++/** ++ * Retrieve data from the kernel buffers ++ * ++ * Retrieve data in the kernel buffers belonging to the given inode. ++ * If successful then the retrieve_reply() method will be called with ++ * the returned data. ++ * ++ * Only present pages are returned in the retrieve reply. Retrieving ++ * stops when it finds a non-present page and only data prior to that ++ * is returned. ++ * ++ * If this function returns an error, then the retrieve will not be ++ * completed and no reply will be sent. ++ * ++ * This function doesn't change the dirty state of pages in the kernel ++ * buffer. For dirty pages the write() method will be called ++ * regardless of having been retrieved previously. ++ * ++ * Added in FUSE protocol version 7.15. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param size the number of bytes to retrieve ++ * @param offset the starting offset into the file to retrieve from ++ * @param cookie user data to supply to the reply callback ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, ++ size_t size, off_t offset, void *cookie); ++ ++ ++/* ----------------------------------------------------------- * ++ * Utility functions * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Get the userdata from the request ++ * ++ * @param req request handle ++ * @return the user data passed to fuse_session_new() ++ */ ++void *fuse_req_userdata(fuse_req_t req); ++ ++/** ++ * Get the context from the request ++ * ++ * The pointer returned by this function will only be valid for the ++ * request's lifetime ++ * ++ * @param req request handle ++ * @return the context structure ++ */ ++const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); ++ ++/** ++ * Get the current supplementary group IDs for the specified request ++ * ++ * Similar to the getgroups(2) system call, except the return value is ++ * always the total number of group IDs, even if it is larger than the ++ * specified size. ++ * ++ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass ++ * the group list to userspace, hence this function needs to parse ++ * "/proc/$TID/task/$TID/status" to get the group IDs. ++ * ++ * This feature may not be supported on all operating systems. In ++ * such a case this function will return -ENOSYS. ++ * ++ * @param req request handle ++ * @param size size of given array ++ * @param list array of group IDs to be filled in ++ * @return the total number of supplementary group IDs or -errno on failure ++ */ ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); ++ ++/** ++ * Callback function for an interrupt ++ * ++ * @param req interrupted request ++ * @param data user data ++ */ ++typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); ++ ++/** ++ * Register/unregister callback for an interrupt ++ * ++ * If an interrupt has already happened, then the callback function is ++ * called from within this function, hence it's not possible for ++ * interrupts to be lost. ++ * ++ * @param req request handle ++ * @param func the callback function or NULL for unregister ++ * @param data user data passed to the callback function ++ */ ++void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, ++ void *data); ++ ++/** ++ * Check if a request has already been interrupted ++ * ++ * @param req request handle ++ * @return 1 if the request has been interrupted, 0 otherwise ++ */ ++int fuse_req_interrupted(fuse_req_t req); ++ ++ ++/* ----------------------------------------------------------- * ++ * Inquiry functions * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Print low-level version information to stdout. ++ */ ++void fuse_lowlevel_version(void); ++ ++/** ++ * Print available low-level options to stdout. This is not an ++ * exhaustive list, but includes only those options that may be of ++ * interest to an end-user of a file system. ++ */ ++void fuse_lowlevel_help(void); ++ ++/** ++ * Print available options for `fuse_parse_cmdline()`. ++ */ ++void fuse_cmdline_help(void); ++ ++/* ----------------------------------------------------------- * ++ * Filesystem setup & teardown * ++ * ----------------------------------------------------------- */ ++ ++struct fuse_cmdline_opts { ++ int singlethread; ++ int foreground; ++ int debug; ++ int nodefault_subtype; ++ char *mountpoint; ++ int show_version; ++ int show_help; ++ int clone_fd; ++ unsigned int max_idle_threads; ++}; ++ ++/** ++ * Utility function to parse common options for simple file systems ++ * using the low-level API. A help text that describes the available ++ * options can be printed with `fuse_cmdline_help`. A single ++ * non-option argument is treated as the mountpoint. Multiple ++ * non-option arguments will result in an error. ++ * ++ * If neither -o subtype= or -o fsname= options are given, a new ++ * subtype option will be added and set to the basename of the program ++ * (the fsname will remain unset, and then defaults to "fuse"). ++ * ++ * Known options will be removed from *args*, unknown options will ++ * remain. ++ * ++ * @param args argument vector (input+output) ++ * @param opts output argument for parsed options ++ * @return 0 on success, -1 on failure ++ */ ++int fuse_parse_cmdline(struct fuse_args *args, ++ struct fuse_cmdline_opts *opts); ++ ++/** ++ * Create a low level session. ++ * ++ * Returns a session structure suitable for passing to ++ * fuse_session_mount() and fuse_session_loop(). ++ * ++ * This function accepts most file-system independent mount options ++ * (like context, nodev, ro - see mount(8)), as well as the general ++ * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and ++ * -o default_permissions, but not ``-o use_ino``). Instead of `-o ++ * debug`, debugging may also enabled with `-d` or `--debug`. ++ * ++ * If not all options are known, an error message is written to stderr ++ * and the function returns NULL. ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. To prevent accidentally passing an option in ++ * argv[0], this element must always be present (even if no options ++ * are specified). It may be set to the empty string ('\0') if no ++ * reasonable value can be provided. ++ * ++ * @param args argument vector ++ * @param op the (low-level) filesystem operations ++ * @param op_size sizeof(struct fuse_lowlevel_ops) ++ * @param userdata user data ++ * ++ * @return the fuse session on success, NULL on failure ++ **/ ++struct fuse_session *fuse_session_new(struct fuse_args *args, ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata); ++ ++/** ++ * Mount a FUSE file system. ++ * ++ * @param mountpoint the mount point path ++ * @param se session object ++ * ++ * @return 0 on success, -1 on failure. ++ **/ ++int fuse_session_mount(struct fuse_session *se, const char *mountpoint); ++ ++/** ++ * Enter a single threaded, blocking event loop. ++ * ++ * When the event loop terminates because the connection to the FUSE ++ * kernel module has been closed, this function returns zero. This ++ * happens when the filesystem is unmounted regularly (by the ++ * filesystem owner or root running the umount(8) or fusermount(1) ++ * command), or if connection is explicitly severed by writing ``1`` ++ * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only ++ * way to distinguish between these two conditions is to check if the ++ * filesystem is still mounted after the session loop returns. ++ * ++ * When some error occurs during request processing, the function ++ * returns a negated errno(3) value. ++ * ++ * If the loop has been terminated because of a signal handler ++ * installed by fuse_set_signal_handlers(), this function returns the ++ * (positive) signal value that triggered the exit. ++ * ++ * @param se the session ++ * @return 0, -errno, or a signal value ++ */ ++int fuse_session_loop(struct fuse_session *se); ++ ++/** ++ * Enter a multi-threaded event loop. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * @param se the session ++ * @param config session loop configuration ++ * @return see fuse_session_loop() ++ */ ++#if FUSE_USE_VERSION < 32 ++int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); ++#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) ++#else ++int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); ++#endif ++ ++/** ++ * Flag a session as terminated. ++ * ++ * This function is invoked by the POSIX signal handlers, when ++ * registered using fuse_set_signal_handlers(). It will cause any ++ * running event loops to terminate on the next opportunity. ++ * ++ * @param se the session ++ */ ++void fuse_session_exit(struct fuse_session *se); ++ ++/** ++ * Reset the terminated flag of a session ++ * ++ * @param se the session ++ */ ++void fuse_session_reset(struct fuse_session *se); ++ ++/** ++ * Query the terminated flag of a session ++ * ++ * @param se the session ++ * @return 1 if exited, 0 if not exited ++ */ ++int fuse_session_exited(struct fuse_session *se); ++ ++/** ++ * Ensure that file system is unmounted. ++ * ++ * In regular operation, the file system is typically unmounted by the ++ * user calling umount(8) or fusermount(1), which then terminates the ++ * FUSE session loop. However, the session loop may also terminate as ++ * a result of an explicit call to fuse_session_exit() (e.g. by a ++ * signal handler installed by fuse_set_signal_handler()). In this ++ * case the filesystem remains mounted, but any attempt to access it ++ * will block (while the filesystem process is still running) or give ++ * an ESHUTDOWN error (after the filesystem process has terminated). ++ * ++ * If the communication channel with the FUSE kernel module is still ++ * open (i.e., if the session loop was terminated by an explicit call ++ * to fuse_session_exit()), this function will close it and unmount ++ * the filesystem. If the communication channel has been closed by the ++ * kernel, this method will do (almost) nothing. ++ * ++ * NOTE: The above semantics mean that if the connection to the kernel ++ * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, ++ * this method will *not* unmount the filesystem. ++ * ++ * @param se the session ++ */ ++void fuse_session_unmount(struct fuse_session *se); ++ ++/** ++ * Destroy a session ++ * ++ * @param se the session ++ */ ++void fuse_session_destroy(struct fuse_session *se); ++ ++/* ----------------------------------------------------------- * ++ * Custom event loop support * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Return file descriptor for communication with kernel. ++ * ++ * The file selector can be used to integrate FUSE with a custom event ++ * loop. Whenever data is available for reading on the provided fd, ++ * the event loop should call `fuse_session_receive_buf` followed by ++ * `fuse_session_process_buf` to process the request. ++ * ++ * The returned file descriptor is valid until `fuse_session_unmount` ++ * is called. ++ * ++ * @param se the session ++ * @return a file descriptor ++ */ ++int fuse_session_fd(struct fuse_session *se); ++ ++/** ++ * Process a raw request supplied in a generic buffer ++ * ++ * The fuse_buf may contain a memory buffer or a pipe file descriptor. ++ * ++ * @param se the session ++ * @param buf the fuse_buf containing the request ++ */ ++void fuse_session_process_buf(struct fuse_session *se, ++ const struct fuse_buf *buf); ++ ++/** ++ * Read a raw request from the kernel into the supplied buffer. ++ * ++ * Depending on file system options, system capabilities, and request ++ * size the request is either read into a memory buffer or spliced ++ * into a temporary pipe. ++ * ++ * @param se the session ++ * @param buf the fuse_buf to store the request in ++ * @return the actual size of the raw request, or -errno on error ++ */ ++int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +new file mode 100644 +index 0000000..2f6663e +--- /dev/null ++++ b/tools/virtiofsd/fuse_misc.h +@@ -0,0 +1,59 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include ++ ++/* ++ Versioned symbols cannot be used in some cases because it ++ - confuse the dynamic linker in uClibc ++ - not supported on MacOSX (in MachO binary format) ++*/ ++#if (!defined(__UCLIBC__) && !defined(__APPLE__)) ++#define FUSE_SYMVER(x) __asm__(x) ++#else ++#define FUSE_SYMVER(x) ++#endif ++ ++#ifndef USE_UCLIBC ++#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL) ++#else ++/* Is this hack still needed? */ ++static inline void fuse_mutex_init(pthread_mutex_t *mut) ++{ ++ pthread_mutexattr_t attr; ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); ++ pthread_mutex_init(mut, &attr); ++ pthread_mutexattr_destroy(&attr); ++} ++#endif ++ ++#ifdef HAVE_STRUCT_STAT_ST_ATIM ++/* Linux */ ++#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec) ++#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec) ++#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec) ++#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val) ++#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val) ++#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val) ++#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) ++/* FreeBSD */ ++#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec) ++#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec) ++#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec) ++#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val) ++#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val) ++#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val) ++#else ++#define ST_ATIM_NSEC(stbuf) 0 ++#define ST_CTIM_NSEC(stbuf) 0 ++#define ST_MTIM_NSEC(stbuf) 0 ++#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0) ++#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0) ++#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0) ++#endif +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +new file mode 100644 +index 0000000..d8573e7 +--- /dev/null ++++ b/tools/virtiofsd/fuse_opt.h +@@ -0,0 +1,271 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_OPT_H_ ++#define FUSE_OPT_H_ ++ ++/** @file ++ * ++ * This file defines the option parsing interface of FUSE ++ */ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Option description ++ * ++ * This structure describes a single option, and action associated ++ * with it, in case it matches. ++ * ++ * More than one such match may occur, in which case the action for ++ * each match is executed. ++ * ++ * There are three possible actions in case of a match: ++ * ++ * i) An integer (int or unsigned) variable determined by 'offset' is ++ * set to 'value' ++ * ++ * ii) The processing function is called, with 'value' as the key ++ * ++ * iii) An integer (any) or string (char *) variable determined by ++ * 'offset' is set to the value of an option parameter ++ * ++ * 'offset' should normally be either set to ++ * ++ * - 'offsetof(struct foo, member)' actions i) and iii) ++ * ++ * - -1 action ii) ++ * ++ * The 'offsetof()' macro is defined in the header. ++ * ++ * The template determines which options match, and also have an ++ * effect on the action. Normally the action is either i) or ii), but ++ * if a format is present in the template, then action iii) is ++ * performed. ++ * ++ * The types of templates are: ++ * ++ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only ++ * themselves. Invalid values are "--" and anything beginning ++ * with "-o" ++ * ++ * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or ++ * the relevant option in a comma separated option list ++ * ++ * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) ++ * which have a parameter ++ * ++ * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform ++ * action iii). ++ * ++ * 5) "-x ", etc. Matches either "-xparam" or "-x param" as ++ * two separate arguments ++ * ++ * 6) "-x %s", etc. Combination of 4) and 5) ++ * ++ * If the format is "%s", memory is allocated for the string unlike with ++ * scanf(). The previous value (if non-NULL) stored at the this location is ++ * freed. ++ */ ++struct fuse_opt { ++ /** Matching template and optional parameter formatting */ ++ const char *templ; ++ ++ /** ++ * Offset of variable within 'data' parameter of fuse_opt_parse() ++ * or -1 ++ */ ++ unsigned long offset; ++ ++ /** ++ * Value to set the variable to, or to be passed as 'key' to the ++ * processing function. Ignored if template has a format ++ */ ++ int value; ++}; ++ ++/** ++ * Key option. In case of a match, the processing function will be ++ * called with the specified key. ++ */ ++#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } ++ ++/** ++ * Last option. An array of 'struct fuse_opt' must end with a NULL ++ * template value ++ */ ++#define FUSE_OPT_END { NULL, 0, 0 } ++ ++/** ++ * Argument list ++ */ ++struct fuse_args { ++ /** Argument count */ ++ int argc; ++ ++ /** Argument vector. NULL terminated */ ++ char **argv; ++ ++ /** Is 'argv' allocated? */ ++ int allocated; ++}; ++ ++/** ++ * Initializer for 'struct fuse_args' ++ */ ++#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } ++ ++/** ++ * Key value passed to the processing function if an option did not ++ * match any template ++ */ ++#define FUSE_OPT_KEY_OPT -1 ++ ++/** ++ * Key value passed to the processing function for all non-options ++ * ++ * Non-options are the arguments beginning with a character other than ++ * '-' or all arguments after the special '--' option ++ */ ++#define FUSE_OPT_KEY_NONOPT -2 ++ ++/** ++ * Special key value for options to keep ++ * ++ * Argument is not passed to processing function, but behave as if the ++ * processing function returned 1 ++ */ ++#define FUSE_OPT_KEY_KEEP -3 ++ ++/** ++ * Special key value for options to discard ++ * ++ * Argument is not passed to processing function, but behave as if the ++ * processing function returned zero ++ */ ++#define FUSE_OPT_KEY_DISCARD -4 ++ ++/** ++ * Processing function ++ * ++ * This function is called if ++ * - option did not match any 'struct fuse_opt' ++ * - argument is a non-option ++ * - option did match and offset was set to -1 ++ * ++ * The 'arg' parameter will always contain the whole argument or ++ * option including the parameter if exists. A two-argument option ++ * ("-x foo") is always converted to single argument option of the ++ * form "-xfoo" before this function is called. ++ * ++ * Options of the form '-ofoo' are passed to this function without the ++ * '-o' prefix. ++ * ++ * The return value of this function determines whether this argument ++ * is to be inserted into the output argument vector, or discarded. ++ * ++ * @param data is the user data passed to the fuse_opt_parse() function ++ * @param arg is the whole argument or option ++ * @param key determines why the processing function was called ++ * @param outargs the current output argument list ++ * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept ++ */ ++typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, ++ struct fuse_args *outargs); ++ ++/** ++ * Option parsing function ++ * ++ * If 'args' was returned from a previous call to fuse_opt_parse() or ++ * it was constructed from ++ * ++ * A NULL 'args' is equivalent to an empty argument vector ++ * ++ * A NULL 'opts' is equivalent to an 'opts' array containing a single ++ * end marker ++ * ++ * A NULL 'proc' is equivalent to a processing function always ++ * returning '1' ++ * ++ * @param args is the input and output argument list ++ * @param data is the user data ++ * @param opts is the option description array ++ * @param proc is the processing function ++ * @return -1 on error, 0 on success ++ */ ++int fuse_opt_parse(struct fuse_args *args, void *data, ++ const struct fuse_opt opts[], fuse_opt_proc_t proc); ++ ++/** ++ * Add an option to a comma separated option list ++ * ++ * @param opts is a pointer to an option list, may point to a NULL value ++ * @param opt is the option to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_opt(char **opts, const char *opt); ++ ++/** ++ * Add an option, escaping commas, to a comma separated option list ++ * ++ * @param opts is a pointer to an option list, may point to a NULL value ++ * @param opt is the option to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_opt_escaped(char **opts, const char *opt); ++ ++/** ++ * Add an argument to a NULL terminated argument vector ++ * ++ * @param args is the structure containing the current argument list ++ * @param arg is the new argument to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_arg(struct fuse_args *args, const char *arg); ++ ++/** ++ * Add an argument at the specified position in a NULL terminated ++ * argument vector ++ * ++ * Adds the argument to the N-th position. This is useful for adding ++ * options at the beginning of the array which must not come after the ++ * special '--' option. ++ * ++ * @param args is the structure containing the current argument list ++ * @param pos is the position at which to add the argument ++ * @param arg is the new argument to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); ++ ++/** ++ * Free the contents of argument list ++ * ++ * The structure itself is not freed ++ * ++ * @param args is the structure containing the argument list ++ */ ++void fuse_opt_free_args(struct fuse_args *args); ++ ++ ++/** ++ * Check if an option matches ++ * ++ * @param opts is the option description array ++ * @param opt is the option to match ++ * @return 1 if a match is found, 0 if not ++ */ ++int fuse_opt_match(const struct fuse_opt opts[], const char *opt); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_OPT_H_ */ +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +new file mode 100644 +index 0000000..6b77c33 +--- /dev/null ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -0,0 +1,76 @@ ++/* ++ * FUSE: Filesystem in Userspace ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE ++ */ ++ ++/* ++ * Creates files on the underlying file system in response to a FUSE_MKNOD ++ * operation ++ */ ++static int mknod_wrapper(int dirfd, const char *path, const char *link, ++ int mode, dev_t rdev) ++{ ++ int res; ++ ++ if (S_ISREG(mode)) { ++ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); ++ if (res >= 0) ++ res = close(res); ++ } else if (S_ISDIR(mode)) { ++ res = mkdirat(dirfd, path, mode); ++ } else if (S_ISLNK(mode) && link != NULL) { ++ res = symlinkat(link, dirfd, path); ++ } else if (S_ISFIFO(mode)) { ++ res = mkfifoat(dirfd, path, mode); ++#ifdef __FreeBSD__ ++ } else if (S_ISSOCK(mode)) { ++ struct sockaddr_un su; ++ int fd; ++ ++ if (strlen(path) >= sizeof(su.sun_path)) { ++ errno = ENAMETOOLONG; ++ return -1; ++ } ++ fd = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (fd >= 0) { ++ /* ++ * We must bind the socket to the underlying file ++ * system to create the socket file, even though ++ * we'll never listen on this socket. ++ */ ++ su.sun_family = AF_UNIX; ++ strncpy(su.sun_path, path, sizeof(su.sun_path)); ++ res = bindat(dirfd, fd, (struct sockaddr*)&su, ++ sizeof(su)); ++ if (res == 0) ++ close(fd); ++ } else { ++ res = -1; ++ } ++#endif ++ } else { ++ res = mknodat(dirfd, path, mode, rdev); ++ } ++ ++ return res; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch b/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch new file mode 100644 index 0000000..27e71f2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch @@ -0,0 +1,193 @@ +From 7a1860c83ff042f3e796c449e780ee0528107213 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:08 +0000 +Subject: [PATCH 12/18] virtiofsd: Remove fuse_req_getgroups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-2-dgilbert@redhat.com> +Patchwork-id: 94122 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/7] virtiofsd: Remove fuse_req_getgroups +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Remove fuse_req_getgroups that's unused in virtiofsd; it came in +from libfuse but we don't actually use it. It was called from +fuse_getgroups which we previously removed (but had left it's header +in). + +Coverity had complained about null termination in it, but removing +it is the easiest answer. + +Fixes: Coverity CID: 1413117 (String not null terminated) +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 988717b46b6424907618cb845ace9d69062703af) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse.h | 20 ----------- + tools/virtiofsd/fuse_lowlevel.c | 77 ----------------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 21 ----------- + 3 files changed, 118 deletions(-) + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 7a4c713..aba13fe 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -1007,26 +1007,6 @@ void fuse_exit(struct fuse *f); + struct fuse_context *fuse_get_context(void); + + /** +- * Get the current supplementary group IDs for the current request +- * +- * Similar to the getgroups(2) system call, except the return value is +- * always the total number of group IDs, even if it is larger than the +- * specified size. +- * +- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass +- * the group list to userspace, hence this function needs to parse +- * "/proc/$TID/task/$TID/status" to get the group IDs. +- * +- * This feature may not be supported on all operating systems. In +- * such a case this function will return -ENOSYS. +- * +- * @param size size of given array +- * @param list array of group IDs to be filled in +- * @return the total number of supplementary group IDs or -errno on failure +- */ +-int fuse_getgroups(int size, gid_t list[]); +- +-/** + * Check if the current request has already been interrupted + * + * @return 1 if the request has been interrupted, 0 otherwise +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index de2e2e0..01c418a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2667,83 +2667,6 @@ int fuse_lowlevel_is_virtio(struct fuse_session *se) + return !!se->virtio_dev; + } + +-#ifdef linux +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) +-{ +- char *buf; +- size_t bufsize = 1024; +- char path[128]; +- int ret; +- int fd; +- unsigned long pid = req->ctx.pid; +- char *s; +- +- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); +- +-retry: +- buf = malloc(bufsize); +- if (buf == NULL) { +- return -ENOMEM; +- } +- +- ret = -EIO; +- fd = open(path, O_RDONLY); +- if (fd == -1) { +- goto out_free; +- } +- +- ret = read(fd, buf, bufsize); +- close(fd); +- if (ret < 0) { +- ret = -EIO; +- goto out_free; +- } +- +- if ((size_t)ret == bufsize) { +- free(buf); +- bufsize *= 4; +- goto retry; +- } +- +- ret = -EIO; +- s = strstr(buf, "\nGroups:"); +- if (s == NULL) { +- goto out_free; +- } +- +- s += 8; +- ret = 0; +- while (1) { +- char *end; +- unsigned long val = strtoul(s, &end, 0); +- if (end == s) { +- break; +- } +- +- s = end; +- if (ret < size) { +- list[ret] = val; +- } +- ret++; +- } +- +-out_free: +- free(buf); +- return ret; +-} +-#else /* linux */ +-/* +- * This is currently not implemented on other than Linux... +- */ +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) +-{ +- (void)req; +- (void)size; +- (void)list; +- return -ENOSYS; +-} +-#endif +- + void fuse_session_exit(struct fuse_session *se) + { + se->exited = 1; +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 138041e..8f6d705 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1705,27 +1705,6 @@ void *fuse_req_userdata(fuse_req_t req); + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); + + /** +- * Get the current supplementary group IDs for the specified request +- * +- * Similar to the getgroups(2) system call, except the return value is +- * always the total number of group IDs, even if it is larger than the +- * specified size. +- * +- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass +- * the group list to userspace, hence this function needs to parse +- * "/proc/$TID/task/$TID/status" to get the group IDs. +- * +- * This feature may not be supported on all operating systems. In +- * such a case this function will return -ENOSYS. +- * +- * @param req request handle +- * @param size size of given array +- * @param list array of group IDs to be filled in +- * @return the total number of supplementary group IDs or -errno on failure +- */ +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); +- +-/** + * Callback function for an interrupt + * + * @param req interrupted request +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch b/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch new file mode 100644 index 0000000..7f9c5bb --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch @@ -0,0 +1,271 @@ +From 80237df2b22eca685037456e65d149fed4654165 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:48 +0100 +Subject: [PATCH 017/116] virtiofsd: Remove unused enum fuse_buf_copy_flags +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-14-dgilbert@redhat.com> +Patchwork-id: 93465 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 013/112] virtiofsd: Remove unused enum fuse_buf_copy_flags +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +Signed-off-by: Xiao Yang +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8c3fe75e0308ba2f01d160ace534b7e386cea808) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 7 +++--- + tools/virtiofsd/fuse_common.h | 46 +--------------------------------------- + tools/virtiofsd/fuse_lowlevel.c | 13 +++++------- + tools/virtiofsd/fuse_lowlevel.h | 35 ++---------------------------- + tools/virtiofsd/passthrough_ll.c | 4 ++-- + 5 files changed, 13 insertions(+), 92 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 5df946c..4d507f3 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -171,7 +171,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) ++ size_t len) + { + int src_is_fd = src->flags & FUSE_BUF_IS_FD; + int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; +@@ -224,8 +224,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + return 1; + } + +-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, +- enum fuse_buf_copy_flags flags) ++ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + { + size_t copied = 0; + +@@ -249,7 +248,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, + dst_len = dst->size - dstv->off; + len = min_size(src_len, dst_len); + +- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); + if (res < 0) { + if (!copied) { + return res; +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index bd9bf86..0cb33ac 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -605,48 +605,6 @@ enum fuse_buf_flags { + }; + + /** +- * Buffer copy flags +- */ +-enum fuse_buf_copy_flags { +- /** +- * Don't use splice(2) +- * +- * Always fall back to using read and write instead of +- * splice(2) to copy data from one file descriptor to another. +- * +- * If this flag is not set, then only fall back if splice is +- * unavailable. +- */ +- FUSE_BUF_NO_SPLICE = (1 << 1), +- +- /** +- * Force splice +- * +- * Always use splice(2) to copy data from one file descriptor +- * to another. If splice is not available, return -EINVAL. +- */ +- FUSE_BUF_FORCE_SPLICE = (1 << 2), +- +- /** +- * Try to move data with splice. +- * +- * If splice is used, try to move pages from the source to the +- * destination instead of copying. See documentation of +- * SPLICE_F_MOVE in splice(2) man page. +- */ +- FUSE_BUF_SPLICE_MOVE = (1 << 3), +- +- /** +- * Don't block on the pipe when copying data with splice +- * +- * Makes the operations on the pipe non-blocking (if the pipe +- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) +- * man page. +- */ +- FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), +-}; +- +-/** + * Single data buffer + * + * Generic data buffer for I/O, extended attributes, etc... Data may +@@ -741,11 +699,9 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + * + * @param dst destination buffer vector + * @param src source buffer vector +- * @param flags flags controlling the copy + * @return actual number of bytes copied or -errno on error + */ +-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, +- enum fuse_buf_copy_flags flags); ++ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); + + /* + * Signal handling +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index eb0ec49..3da80de 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -490,16 +490,14 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) ++ struct fuse_bufvec *buf) + { + size_t len = fuse_buf_size(buf); +- (void)flags; + + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } + +-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + { + struct iovec iov[2]; + struct fuse_out_header out; +@@ -511,7 +509,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + out.unique = req->unique; + out.error = 0; + +- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); + if (res <= 0) { + fuse_free_req(req); + return res; +@@ -1969,8 +1967,7 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + } + + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ off_t offset, struct fuse_bufvec *bufv) + { + struct fuse_out_header out; + struct fuse_notify_store_out outarg; +@@ -1999,7 +1996,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv); + if (res > 0) { + res = -res; + } +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 12a84b4..2fa225d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1363,33 +1363,6 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + /** + * Reply with data copied/moved from buffer(s) + * +- * Zero copy data transfer ("splicing") will be used under +- * the following circumstances: +- * +- * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and +- * 2. the kernel supports splicing from the fuse device +- * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and +- * 3. *flags* does not contain FUSE_BUF_NO_SPLICE +- * 4. The amount of data that is provided in file-descriptor backed +- * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) +- * is at least twice the page size. +- * +- * In order for SPLICE_F_MOVE to be used, the following additional +- * conditions have to be fulfilled: +- * +- * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and +- * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in +- fuse_conn_info.capable), and +- * 3. *flags* contains FUSE_BUF_SPLICE_MOVE +- * +- * Note that, if splice is used, the data is actually spliced twice: +- * once into a temporary pipe (to prepend header data), and then again +- * into the kernel. If some of the provided buffers are memory-backed, +- * the data in them is copied in step one and spliced in step two. +- * +- * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags +- * are silently ignored. +- * + * Possible requests: + * read, readdir, getxattr, listxattr + * +@@ -1400,11 +1373,9 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + * + * @param req request handle + * @param bufv buffer vector +- * @param flags flags controlling the copy + * @return zero for success, -errno for failure to send reply + */ +-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv); + + /** + * Reply with data vector +@@ -1705,12 +1676,10 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + * @param ino the inode number + * @param offset the starting offset into the file to store to + * @param bufv buffer vector +- * @param flags flags controlling the copy + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ off_t offset, struct fuse_bufvec *bufv); + + /* + * Utility functions +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9377718..126a56c 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -931,7 +931,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + buf.buf[0].fd = fi->fh; + buf.buf[0].pos = offset; + +- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++ fuse_reply_data(req, &buf); + } + + static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, +@@ -952,7 +952,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].size, (unsigned long)off); + } + +- res = fuse_buf_copy(&out_buf, in_buf, 0); ++ res = fuse_buf_copy(&out_buf, in_buf); + if (res < 0) { + fuse_reply_err(req, -res); + } else { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch b/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch new file mode 100644 index 0000000..e1a3cd1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch @@ -0,0 +1,72 @@ +From b8d62021f28114f054571b96ec0cd4dad4476923 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:14 +0100 +Subject: [PATCH 103/116] virtiofsd: Reset O_DIRECT flag during file open +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-100-dgilbert@redhat.com> +Patchwork-id: 93553 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 099/112] virtiofsd: Reset O_DIRECT flag during file open +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If an application wants to do direct IO and opens a file with O_DIRECT +in guest, that does not necessarily mean that we need to bypass page +cache on host as well. So reset this flag on host. + +If somebody needs to bypass page cache on host as well (and it is safe to +do so), we can add a knob in daemon later to control this behavior. + +I check virtio-9p and they do reset O_DIRECT flag. + +Signed-off-by: Vivek Goyal +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 65da4539803373ec4eec97ffc49ee90083e56efd) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ccbbec1..948cb19 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1721,6 +1721,13 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++ + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); + err = fd == -1 ? errno : 0; +@@ -1950,6 +1957,13 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fi->flags &= ~O_APPEND; + } + ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++ + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); + if (fd == -1) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch b/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch new file mode 100644 index 0000000..5453fda --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch @@ -0,0 +1,199 @@ +From bb1f691dc410ce11ac9675ced70e78a3ce2511b0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:03 +0100 +Subject: [PATCH 032/116] virtiofsd: Send replies to messages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-29-dgilbert@redhat.com> +Patchwork-id: 93485 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 028/112] virtiofsd: Send replies to messages +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Route fuse out messages back through the same queue elements +that had the command that triggered the request. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit df57ba919ec3edef9cc208d35685095e6e92713e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 4 ++ + tools/virtiofsd/fuse_virtio.c | 107 ++++++++++++++++++++++++++++++++++++++-- + tools/virtiofsd/fuse_virtio.h | 4 ++ + 3 files changed, 111 insertions(+), 4 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index af09fa2..380d93b 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -171,6 +171,10 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + } + ++ if (fuse_lowlevel_is_virtio(se)) { ++ return virtio_send_msg(se, ch, iov, count); ++ } ++ + abort(); /* virtio should have taken it before here */ + return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 3841b20..05d0e29 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -41,6 +41,9 @@ struct fv_QueueInfo { + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; ++ ++ /* The element for the command currently being processed */ ++ VuVirtqElement *qe; + }; + + /* +@@ -121,6 +124,105 @@ static void copy_from_iov(struct fuse_buf *buf, size_t out_num, + } + } + ++/* ++ * Copy from one iov to another, the given number of bytes ++ * The caller must have checked sizes. ++ */ ++static void copy_iov(struct iovec *src_iov, int src_count, ++ struct iovec *dst_iov, int dst_count, size_t to_copy) ++{ ++ size_t dst_offset = 0; ++ /* Outer loop copies 'src' elements */ ++ while (to_copy) { ++ assert(src_count); ++ size_t src_len = src_iov[0].iov_len; ++ size_t src_offset = 0; ++ ++ if (src_len > to_copy) { ++ src_len = to_copy; ++ } ++ /* Inner loop copies contents of one 'src' to maybe multiple dst. */ ++ while (src_len) { ++ assert(dst_count); ++ size_t dst_len = dst_iov[0].iov_len - dst_offset; ++ if (dst_len > src_len) { ++ dst_len = src_len; ++ } ++ ++ memcpy(dst_iov[0].iov_base + dst_offset, ++ src_iov[0].iov_base + src_offset, dst_len); ++ src_len -= dst_len; ++ to_copy -= dst_len; ++ src_offset += dst_len; ++ dst_offset += dst_len; ++ ++ assert(dst_offset <= dst_iov[0].iov_len); ++ if (dst_offset == dst_iov[0].iov_len) { ++ dst_offset = 0; ++ dst_iov++; ++ dst_count--; ++ } ++ } ++ src_iov++; ++ src_count--; ++ } ++} ++ ++/* ++ * Called back by ll whenever it wants to send a reply/message back ++ * The 1st element of the iov starts with the fuse_out_header ++ * 'unique'==0 means it's a notify message. ++ */ ++int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count) ++{ ++ VuVirtqElement *elem; ++ VuVirtq *q; ++ ++ assert(count >= 1); ++ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); ++ ++ struct fuse_out_header *out = iov[0].iov_base; ++ /* TODO: Endianness! */ ++ ++ size_t tosend_len = iov_size(iov, count); ++ ++ /* unique == 0 is notification, which we don't support */ ++ assert(out->unique); ++ /* For virtio we always have ch */ ++ assert(ch); ++ elem = ch->qi->qe; ++ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ ++ /* The 'in' part of the elem is to qemu */ ++ unsigned int in_num = elem->in_num; ++ struct iovec *in_sg = elem->in_sg; ++ size_t in_len = iov_size(in_sg, in_num); ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", ++ __func__, elem->index, in_num, in_len); ++ ++ /* ++ * The elem should have room for a 'fuse_out_header' (out from fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (in_len < sizeof(struct fuse_out_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", ++ __func__, elem->index); ++ return -E2BIG; ++ } ++ if (in_len < tosend_len) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", ++ __func__, elem->index, tosend_len); ++ return -E2BIG; ++ } ++ ++ copy_iov(iov, count, in_sg, in_num, tosend_len); ++ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); ++ vu_queue_notify(&se->virtio_dev->dev, q); ++ ++ return 0; ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { +@@ -226,13 +328,10 @@ static void *fv_queue_thread(void *opaque) + + /* TODO! Endianness of header */ + +- /* TODO: Fixup fuse_send_msg */ + /* TODO: Add checks for fuse_session_exited */ + fuse_session_process_buf_int(se, &fbuf, &ch); + +- /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ +- vu_queue_notify(dev, q); +- ++ qi->qe = NULL; + free(elem); + elem = NULL; + } +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 23026d6..135a148 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -22,4 +22,8 @@ int virtio_session_mount(struct fuse_session *se); + + int virtio_loop(struct fuse_session *se); + ++ ++int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Show-submounts.patch b/SOURCES/kvm-virtiofsd-Show-submounts.patch new file mode 100644 index 0000000..d45a030 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Show-submounts.patch @@ -0,0 +1,51 @@ +From 717373379510cd6ecf8c6d0e1aae65edfac4551d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 5 May 2020 16:35:58 +0100 +Subject: [PATCH 7/9] virtiofsd: Show submounts + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200505163600.22956-6-dgilbert@redhat.com> +Patchwork-id: 96273 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 5/7] virtiofsd: Show submounts +Bugzilla: 1817445 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Michael S. Tsirkin + +From: Max Reitz + +Currently, setup_mounts() bind-mounts the shared directory without +MS_REC. This makes all submounts disappear. + +Pass MS_REC so that the guest can see submounts again. + +Fixes: 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc +Signed-off-by: Max Reitz +Message-Id: <20200424133516.73077-1-mreitz@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert + Changed Fixes to point to the commit with the problem rather than + the commit that turned it on +(cherry picked from commit ace0829c0d08f0e5f1451e402e94495bc2166772) + +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 73d8405..614ba55 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2670,7 +2670,7 @@ static void setup_mounts(const char *source) + int oldroot; + int newroot; + +- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { ++ if (mount(source, source, NULL, MS_BIND | MS_REC, NULL) < 0) { + fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); + exit(1); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Start-queue-threads.patch b/SOURCES/kvm-virtiofsd-Start-queue-threads.patch new file mode 100644 index 0000000..8b03cd6 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Start-queue-threads.patch @@ -0,0 +1,165 @@ +From 38282d996cde61261211160577b366b83cad8012 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:00 +0100 +Subject: [PATCH 029/116] virtiofsd: Start queue threads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-26-dgilbert@redhat.com> +Patchwork-id: 93479 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 025/112] virtiofsd: Start queue threads +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Start a thread for each queue when we get notified it's been started. + +Signed-off-by: Dr. David Alan Gilbert +fix by: +Signed-off-by: Jun Piao +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e4c55a3c144493b436e40031e2eed61a84eca47b) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 89 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 89 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 4819e56..2a94bb3 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -11,6 +11,7 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" +@@ -30,6 +31,15 @@ + + #include "contrib/libvhost-user/libvhost-user.h" + ++struct fv_QueueInfo { ++ pthread_t thread; ++ struct fv_VuDev *virtio_dev; ++ ++ /* Our queue index, corresponds to array position */ ++ int qidx; ++ int kick_fd; ++}; ++ + /* + * We pass the dev element into libvhost-user + * and then use it to get back to the outer +@@ -38,6 +48,13 @@ + struct fv_VuDev { + VuDev dev; + struct fuse_session *se; ++ ++ /* ++ * The following pair of fields are only accessed in the main ++ * virtio_loop ++ */ ++ size_t nqueues; ++ struct fv_QueueInfo **qi; + }; + + /* From spec */ +@@ -83,6 +100,75 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++static void *fv_queue_thread(void *opaque) ++{ ++ struct fv_QueueInfo *qi = opaque; ++ fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, ++ qi->qidx, qi->kick_fd); ++ while (1) { ++ /* TODO */ ++ } ++ ++ return NULL; ++} ++ ++/* Callback from libvhost-user on start or stop of a queue */ ++static void fv_queue_set_started(VuDev *dev, int qidx, bool started) ++{ ++ struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev); ++ struct fv_QueueInfo *ourqi; ++ ++ fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx, ++ started); ++ assert(qidx >= 0); ++ ++ /* ++ * Ignore additional request queues for now. passthrough_ll.c must be ++ * audited for thread-safety issues first. It was written with a ++ * well-behaved client in mind and may not protect against all types of ++ * races yet. ++ */ ++ if (qidx > 1) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: multiple request queues not yet implemented, please only " ++ "configure 1 request queue\n", ++ __func__); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (started) { ++ /* Fire up a thread to watch this queue */ ++ if (qidx >= vud->nqueues) { ++ vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0])); ++ assert(vud->qi); ++ memset(vud->qi + vud->nqueues, 0, ++ sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues))); ++ vud->nqueues = qidx + 1; ++ } ++ if (!vud->qi[qidx]) { ++ vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1); ++ assert(vud->qi[qidx]); ++ vud->qi[qidx]->virtio_dev = vud; ++ vud->qi[qidx]->qidx = qidx; ++ } else { ++ /* Shouldn't have been started */ ++ assert(vud->qi[qidx]->kick_fd == -1); ++ } ++ ourqi = vud->qi[qidx]; ++ ourqi->kick_fd = dev->vq[qidx].kick_fd; ++ if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", ++ __func__, qidx); ++ assert(0); ++ } ++ } else { ++ /* TODO: Kill the thread */ ++ assert(qidx < vud->nqueues); ++ ourqi = vud->qi[qidx]; ++ ourqi->kick_fd = -1; ++ } ++} ++ + static bool fv_queue_order(VuDev *dev, int qidx) + { + return false; +@@ -92,6 +178,9 @@ static const VuDevIface fv_iface = { + .get_features = fv_get_features, + .set_features = fv_set_features, + ++ /* Don't need process message, we've not got any at vhost-user level */ ++ .queue_set_started = fv_queue_set_started, ++ + .queue_is_processed_in_order = fv_queue_order, + }; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch b/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch new file mode 100644 index 0000000..2022480 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch @@ -0,0 +1,200 @@ +From b4af2eff8ecadb4e2c9520602455f77fac2cb943 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:02 +0100 +Subject: [PATCH 031/116] virtiofsd: Start reading commands from queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-28-dgilbert@redhat.com> +Patchwork-id: 93484 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 027/112] virtiofsd: Start reading commands from queue +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pop queue elements off queues, copy the data from them and +pass that to fuse. + + Note: 'out' in a VuVirtqElement is from QEMU + 'in' in libfuse is into the daemon + + So we read from the out iov's to get a fuse_in_header + +When we get a kick we've got to read all the elements until the queue +is empty. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b509e1228b3e5eb83c14819045988999fc2dbd1b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 2 + + tools/virtiofsd/fuse_virtio.c | 99 +++++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 98 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index ec04449..1126723 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -14,6 +14,7 @@ + #include "fuse_lowlevel.h" + + struct fv_VuDev; ++struct fv_QueueInfo; + + struct fuse_req { + struct fuse_session *se; +@@ -75,6 +76,7 @@ struct fuse_chan { + pthread_mutex_t lock; + int ctr; + int fd; ++ struct fv_QueueInfo *qi; + }; + + /** +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 05e7258..3841b20 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -12,6 +12,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/iov.h" + #include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" +@@ -32,6 +33,7 @@ + + #include "contrib/libvhost-user/libvhost-user.h" + ++struct fv_VuDev; + struct fv_QueueInfo { + pthread_t thread; + struct fv_VuDev *virtio_dev; +@@ -101,10 +103,41 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++/* ++ * Copy from an iovec into a fuse_buf (memory only) ++ * Caller must ensure there is space ++ */ ++static void copy_from_iov(struct fuse_buf *buf, size_t out_num, ++ const struct iovec *out_sg) ++{ ++ void *dest = buf->mem; ++ ++ while (out_num) { ++ size_t onelen = out_sg->iov_len; ++ memcpy(dest, out_sg->iov_base, onelen); ++ dest += onelen; ++ out_sg++; ++ out_num--; ++ } ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; ++ struct VuDev *dev = &qi->virtio_dev->dev; ++ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ struct fuse_session *se = qi->virtio_dev->se; ++ struct fuse_chan ch; ++ struct fuse_buf fbuf; ++ ++ fbuf.mem = NULL; ++ fbuf.flags = 0; ++ ++ fuse_mutex_init(&ch.lock); ++ ch.fd = (int)0xdaff0d111; ++ ch.qi = qi; ++ + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +@@ -141,11 +174,71 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); + break; + } +- if (qi->virtio_dev->se->debug) { +- fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, +- qi->qidx, (size_t)evalue); ++ /* out is from guest, in is too guest */ ++ unsigned int in_bytes, out_bytes; ++ vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", ++ __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); ++ ++ while (1) { ++ /* ++ * An element contains one request and the space to send our ++ * response They're spread over multiple descriptors in a ++ * scatter/gather set and we can't trust the guest to keep them ++ * still; so copy in/out. ++ */ ++ VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); ++ if (!elem) { ++ break; ++ } ++ ++ if (!fbuf.mem) { ++ fbuf.mem = malloc(se->bufsize); ++ assert(fbuf.mem); ++ assert(se->bufsize > sizeof(struct fuse_in_header)); ++ } ++ /* The 'out' part of the elem is from qemu */ ++ unsigned int out_num = elem->out_num; ++ struct iovec *out_sg = elem->out_sg; ++ size_t out_len = iov_size(out_sg, out_num); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: elem %d: with %d out desc of length %zd\n", __func__, ++ elem->index, out_num, out_len); ++ ++ /* ++ * The elem should contain a 'fuse_in_header' (in to fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (out_len < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ if (out_len > se->bufsize) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ copy_from_iov(&fbuf, out_num, out_sg); ++ fbuf.size = out_len; ++ ++ /* TODO! Endianness of header */ ++ ++ /* TODO: Fixup fuse_send_msg */ ++ /* TODO: Add checks for fuse_session_exited */ ++ fuse_session_process_buf_int(se, &fbuf, &ch); ++ ++ /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ ++ vu_queue_notify(dev, q); ++ ++ free(elem); ++ elem = NULL; + } + } ++ pthread_mutex_destroy(&ch.lock); ++ free(fbuf.mem); + + return NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch b/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch new file mode 100644 index 0000000..7b50118 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch @@ -0,0 +1,247 @@ +From 020f593031b0b54e4c35faffea489b700aed6a72 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:57 +0100 +Subject: [PATCH 026/116] virtiofsd: Start wiring up vhost-user +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-23-dgilbert@redhat.com> +Patchwork-id: 93477 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 022/112] virtiofsd: Start wiring up vhost-user +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Listen on our unix socket for the connection from QEMU, when we get it +initialise vhost-user and dive into our own loop variant (currently +dummy). + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f6f3573c6f271af5ded63ce28589a113f7205c72) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 4 ++ + tools/virtiofsd/fuse_lowlevel.c | 5 +++ + tools/virtiofsd/fuse_lowlevel.h | 7 ++++ + tools/virtiofsd/fuse_virtio.c | 87 +++++++++++++++++++++++++++++++++++++++- + tools/virtiofsd/fuse_virtio.h | 2 + + tools/virtiofsd/passthrough_ll.c | 7 +--- + 6 files changed, 106 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 82d6ac7..ec04449 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -13,6 +13,8 @@ + #include "fuse.h" + #include "fuse_lowlevel.h" + ++struct fv_VuDev; ++ + struct fuse_req { + struct fuse_session *se; + uint64_t unique; +@@ -65,6 +67,8 @@ struct fuse_session { + size_t bufsize; + int error; + char *vu_socket_path; ++ int vu_socketfd; ++ struct fv_VuDev *virtio_dev; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 5df124e..af09fa2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2242,6 +2242,11 @@ void fuse_session_unmount(struct fuse_session *se) + { + } + ++int fuse_lowlevel_is_virtio(struct fuse_session *se) ++{ ++ return se->vu_socket_path != NULL; ++} ++ + #ifdef linux + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 2fa225d..f6b3470 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1755,6 +1755,13 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, + */ + int fuse_req_interrupted(fuse_req_t req); + ++/** ++ * Check if the session is connected via virtio ++ * ++ * @param se session object ++ * @return 1 if the session is a virtio session ++ */ ++int fuse_lowlevel_is_virtio(struct fuse_session *se); + + /* + * Inquiry functions +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index cbef6ff..2ae3c76 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -19,18 +19,78 @@ + + #include + #include ++#include + #include + #include + #include + #include + #include + ++#include "contrib/libvhost-user/libvhost-user.h" ++ ++/* ++ * We pass the dev element into libvhost-user ++ * and then use it to get back to the outer ++ * container for other data. ++ */ ++struct fv_VuDev { ++ VuDev dev; ++ struct fuse_session *se; ++}; ++ + /* From spec */ + struct virtio_fs_config { + char tag[36]; + uint32_t num_queues; + }; + ++/* ++ * Callback from libvhost-user if there's a new fd we're supposed to listen ++ * to, typically a queue kick? ++ */ ++static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb, ++ void *data) ++{ ++ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); ++} ++ ++/* ++ * Callback from libvhost-user if we're no longer supposed to listen on an fd ++ */ ++static void fv_remove_watch(VuDev *dev, int fd) ++{ ++ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); ++} ++ ++/* Callback from libvhost-user to panic */ ++static void fv_panic(VuDev *dev, const char *err) ++{ ++ fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err); ++ /* TODO: Allow reconnects?? */ ++ exit(EXIT_FAILURE); ++} ++ ++static bool fv_queue_order(VuDev *dev, int qidx) ++{ ++ return false; ++} ++ ++static const VuDevIface fv_iface = { ++ /* TODO: Add other callbacks */ ++ .queue_is_processed_in_order = fv_queue_order, ++}; ++ ++int virtio_loop(struct fuse_session *se) ++{ ++ fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); ++ ++ while (1) { ++ /* TODO: Add stuffing */ ++ } ++ ++ fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); ++} ++ + int virtio_session_mount(struct fuse_session *se) + { + struct sockaddr_un un; +@@ -75,5 +135,30 @@ int virtio_session_mount(struct fuse_session *se) + return -1; + } + +- return -1; ++ fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", ++ __func__); ++ int data_sock = accept(listen_sock, NULL, NULL); ++ if (data_sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); ++ close(listen_sock); ++ return -1; ++ } ++ close(listen_sock); ++ fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", ++ __func__); ++ ++ /* TODO: Some cleanup/deallocation! */ ++ se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); ++ if (!se->virtio_dev) { ++ fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__); ++ close(data_sock); ++ return -1; ++ } ++ ++ se->vu_socketfd = data_sock; ++ se->virtio_dev->se = se; ++ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, ++ fv_remove_watch, &fv_iface); ++ ++ return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 8f2edb6..23026d6 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -20,4 +20,6 @@ struct fuse_session; + + int virtio_session_mount(struct fuse_session *se); + ++int virtio_loop(struct fuse_session *se); ++ + #endif +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index fc9b264..037c5d7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -36,6 +36,7 @@ + */ + + #include "qemu/osdep.h" ++#include "fuse_virtio.h" + #include "fuse_lowlevel.h" + #include + #include +@@ -1395,11 +1396,7 @@ int main(int argc, char *argv[]) + fuse_daemonize(opts.foreground); + + /* Block until ctrl+c or fusermount -u */ +- if (opts.singlethread) { +- ret = fuse_session_loop(se); +- } else { +- ret = fuse_session_loop_mt(se, opts.clone_fd); +- } ++ ret = virtio_loop(se); + + fuse_session_unmount(se); + err_out3: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch b/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch new file mode 100644 index 0000000..e60364a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch @@ -0,0 +1,355 @@ +From 8e46d0862c4c204f92c08ce2ae961921f270efb5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:03 +0100 +Subject: [PATCH 092/116] virtiofsd: Support remote posix locks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-89-dgilbert@redhat.com> +Patchwork-id: 93537 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 088/112] virtiofsd: Support remote posix locks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +Doing posix locks with-in guest kernel are not sufficient if a file/dir +is being shared by multiple guests. So we need the notion of daemon doing +the locks which are visible to rest of the guests. + +Given posix locks are per process, one can not call posix lock API on host, +otherwise bunch of basic posix locks properties are broken. For example, +If two processes (A and B) in guest open the file and take locks on different +sections of file, if one of the processes closes the fd, it will close +fd on virtiofsd and all posix locks on file will go away. This means if +process A closes the fd, then locks of process B will go away too. + +Similar other problems exist too. + +This patch set tries to emulate posix locks while using open file +description locks provided on Linux. + +Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable +or disable posix locking in daemon. By default it is enabled. + +There are few issues though. + +- GETLK() returns pid of process holding lock. As we are emulating locks + using OFD, and these locks are not per process and don't return pid + of process, so GETLK() in guest does not reuturn process pid. + +- As of now only F_SETLK is supported and not F_SETLKW. We can't block + the thread in virtiofsd for arbitrary long duration as there is only + one thread serving the queue. That means unlock request will not make + it to daemon and F_SETLKW will block infinitely and bring virtio-fs + to a halt. This is a solvable problem though and will require significant + changes in virtiofsd and kernel. Left as a TODO item for now. + +Signed-off-by: Vivek Goyal +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0e81414c54161296212f6bc8a1c70526c4a9755a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 3 + + tools/virtiofsd/passthrough_ll.c | 189 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 192 insertions(+) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5672024..33749bf 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -156,6 +156,9 @@ void fuse_cmdline_help(void) + " allowed (default: 10)\n" + " -o norace disable racy fallback\n" + " default: false\n" ++ " -o posix_lock|no_posix_lock\n" ++ " enable/disable remote posix lock\n" ++ " default: posix_lock\n" + " -o readdirplus|no_readdirplus\n" + " enable/disable readirplus\n" + " default: readdirplus except with " +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 05b5f89..9414935 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -67,6 +67,12 @@ + #include "passthrough_helpers.h" + #include "seccomp.h" + ++/* Keep track of inode posix locks for each owner. */ ++struct lo_inode_plock { ++ uint64_t lock_owner; ++ int fd; /* fd for OFD locks */ ++}; ++ + struct lo_map_elem { + union { + struct lo_inode *inode; +@@ -95,6 +101,8 @@ struct lo_inode { + struct lo_key key; + uint64_t refcount; /* protected by lo->mutex */ + fuse_ino_t fuse_ino; ++ pthread_mutex_t plock_mutex; ++ GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + }; + + struct lo_cred { +@@ -114,6 +122,7 @@ struct lo_data { + int norace; + int writeback; + int flock; ++ int posix_lock; + int xattr; + char *source; + double timeout; +@@ -137,6 +146,8 @@ static const struct fuse_opt lo_opts[] = { + { "source=%s", offsetof(struct lo_data, source), 0 }, + { "flock", offsetof(struct lo_data, flock), 1 }, + { "no_flock", offsetof(struct lo_data, flock), 0 }, ++ { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, ++ { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, + { "xattr", offsetof(struct lo_data, xattr), 1 }, + { "no_xattr", offsetof(struct lo_data, xattr), 0 }, + { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, +@@ -485,6 +496,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } ++ ++ if (conn->capable & FUSE_CAP_POSIX_LOCKS) { ++ if (lo->posix_lock) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); ++ conn->want |= FUSE_CAP_POSIX_LOCKS; ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); ++ conn->want &= ~FUSE_CAP_POSIX_LOCKS; ++ } ++ } ++ + if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || + lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); +@@ -772,6 +794,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + return p; + } + ++/* value_destroy_func for posix_locks GHashTable */ ++static void posix_locks_value_destroy(gpointer data) ++{ ++ struct lo_inode_plock *plock = data; ++ ++ /* ++ * We had used open() for locks and had only one fd. So ++ * closing this fd should release all OFD locks. ++ */ ++ close(plock->fd); ++ free(plock); ++} ++ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct fuse_entry_param *e) + { +@@ -825,6 +860,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + newfd = -1; + inode->key.ino = e->attr.st_ino; + inode->key.dev = e->attr.st_dev; ++ pthread_mutex_init(&inode->plock_mutex, NULL); ++ inode->posix_locks = g_hash_table_new_full( ++ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + + pthread_mutex_lock(&lo->mutex); + inode->fuse_ino = lo_add_inode_mapping(req, inode); +@@ -1160,6 +1198,11 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + if (!inode->refcount) { + lo_map_remove(&lo->ino_map, inode->fuse_ino); + g_hash_table_remove(lo->inodes, &inode->key); ++ if (g_hash_table_size(inode->posix_locks)) { ++ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); ++ } ++ g_hash_table_destroy(inode->posix_locks); ++ pthread_mutex_destroy(&inode->plock_mutex); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +@@ -1516,6 +1559,136 @@ out: + } + } + ++/* Should be called with inode->plock_mutex held */ ++static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, ++ struct lo_inode *inode, ++ uint64_t lock_owner, ++ pid_t pid, int *err) ++{ ++ struct lo_inode_plock *plock; ++ char procname[64]; ++ int fd; ++ ++ plock = ++ g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); ++ ++ if (plock) { ++ return plock; ++ } ++ ++ plock = malloc(sizeof(struct lo_inode_plock)); ++ if (!plock) { ++ *err = ENOMEM; ++ return NULL; ++ } ++ ++ /* Open another instance of file which can be used for ofd locks. */ ++ sprintf(procname, "%i", inode->fd); ++ ++ /* TODO: What if file is not writable? */ ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd == -1) { ++ *err = errno; ++ free(plock); ++ return NULL; ++ } ++ ++ plock->lock_owner = lock_owner; ++ plock->fd = fd; ++ g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), ++ plock); ++ return plock; ++} ++ ++static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ struct lo_inode_plock *plock; ++ int ret, saverr = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_getlk(ino=%" PRIu64 ", flags=%d)" ++ " owner=0x%lx, l_type=%d l_start=0x%lx" ++ " l_len=0x%lx\n", ++ ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, ++ lock->l_len); ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&inode->plock_mutex); ++ plock = ++ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); ++ if (!plock) { ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, ret); ++ return; ++ } ++ ++ ret = fcntl(plock->fd, F_OFD_GETLK, lock); ++ if (ret == -1) { ++ saverr = errno; ++ } ++ pthread_mutex_unlock(&inode->plock_mutex); ++ ++ if (saverr) { ++ fuse_reply_err(req, saverr); ++ } else { ++ fuse_reply_lock(req, lock); ++ } ++} ++ ++static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock, int sleep) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ struct lo_inode_plock *plock; ++ int ret, saverr = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_setlk(ino=%" PRIu64 ", flags=%d)" ++ " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" ++ " l_start=0x%lx l_len=0x%lx\n", ++ ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, ++ lock->l_whence, lock->l_start, lock->l_len); ++ ++ if (sleep) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&inode->plock_mutex); ++ plock = ++ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); ++ ++ if (!plock) { ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, ret); ++ return; ++ } ++ ++ /* TODO: Is it alright to modify flock? */ ++ lock->l_pid = 0; ++ ret = fcntl(plock->fd, F_OFD_SETLK, lock); ++ if (ret == -1) { ++ saverr = errno; ++ } ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, saverr); ++} ++ + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { +@@ -1617,6 +1790,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int res; + (void)ino; ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ /* An fd is going away. Cleanup associated posix locks */ ++ pthread_mutex_lock(&inode->plock_mutex); ++ g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); ++ pthread_mutex_unlock(&inode->plock_mutex); ++ + res = close(dup(lo_fi_fd(req, fi))); + fuse_reply_err(req, res == -1 ? errno : 0); + } +@@ -2080,6 +2266,8 @@ static struct fuse_lowlevel_ops lo_oper = { + .releasedir = lo_releasedir, + .fsyncdir = lo_fsyncdir, + .create = lo_create, ++ .getlk = lo_getlk, ++ .setlk = lo_setlk, + .open = lo_open, + .release = lo_release, + .flush = lo_flush, +@@ -2434,6 +2622,7 @@ int main(int argc, char *argv[]) + struct lo_data lo = { + .debug = 0, + .writeback = 0, ++ .posix_lock = 1, + .proc_self_fd = -1, + }; + struct lo_map_elem *root_elem; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch b/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch new file mode 100644 index 0000000..f3f1e85 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch @@ -0,0 +1,1582 @@ +From 9d3788b1c2fa5cb4f14e292232a05c6a5217802d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:44 +0100 +Subject: [PATCH 013/116] virtiofsd: Trim down imported files +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-10-dgilbert@redhat.com> +Patchwork-id: 93463 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 009/112] virtiofsd: Trim down imported files +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +There's a lot of the original fuse code we don't need; trim them down. + +Signed-off-by: Dr. David Alan Gilbert +with additional trimming by: +Signed-off-by: Misono Tomohiro +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a3e23f325439a290c504d6bbc48c2e742149ecab) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 71 +--- + tools/virtiofsd/fuse.h | 46 --- + tools/virtiofsd/fuse_common.h | 32 -- + tools/virtiofsd/fuse_i.h | 41 --- + tools/virtiofsd/fuse_log.h | 8 - + tools/virtiofsd/fuse_lowlevel.c | 675 +--------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 28 -- + tools/virtiofsd/fuse_opt.h | 8 - + tools/virtiofsd/helper.c | 143 ------- + tools/virtiofsd/passthrough_helpers.h | 26 -- + tools/virtiofsd/passthrough_ll.c | 1 - + 11 files changed, 8 insertions(+), 1071 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 5ab9b87..aefb7db 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -157,73 +157,6 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + return copied; + } + +-#ifdef HAVE_SPLICE +-static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) +-{ +- int splice_flags = 0; +- off_t *srcpos = NULL; +- off_t *dstpos = NULL; +- off_t srcpos_val; +- off_t dstpos_val; +- ssize_t res; +- size_t copied = 0; +- +- if (flags & FUSE_BUF_SPLICE_MOVE) +- splice_flags |= SPLICE_F_MOVE; +- if (flags & FUSE_BUF_SPLICE_NONBLOCK) +- splice_flags |= SPLICE_F_NONBLOCK; +- +- if (src->flags & FUSE_BUF_FD_SEEK) { +- srcpos_val = src->pos + src_off; +- srcpos = &srcpos_val; +- } +- if (dst->flags & FUSE_BUF_FD_SEEK) { +- dstpos_val = dst->pos + dst_off; +- dstpos = &dstpos_val; +- } +- +- while (len) { +- res = splice(src->fd, srcpos, dst->fd, dstpos, len, +- splice_flags); +- if (res == -1) { +- if (copied) +- break; +- +- if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) +- return -errno; +- +- /* Maybe splice is not supported for this combination */ +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, +- len); +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(src->flags & FUSE_BUF_FD_RETRY) && +- !(dst->flags & FUSE_BUF_FD_RETRY)) { +- break; +- } +- +- len -= res; +- } +- +- return copied; +-} +-#else +-static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) +-{ +- (void) flags; +- +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); +-} +-#endif +- +- + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len, enum fuse_buf_copy_flags flags) +@@ -247,10 +180,8 @@ static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + return fuse_buf_write(dst, dst_off, src, src_off, len); + } else if (!dst_is_fd) { + return fuse_buf_read(dst, dst_off, src, src_off, len); +- } else if (flags & FUSE_BUF_NO_SPLICE) { +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); + } else { +- return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); + } + } + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 883f6e5..3202fba 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -25,10 +25,6 @@ + #include + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* ----------------------------------------------------------- * + * Basic FUSE API * + * ----------------------------------------------------------- */ +@@ -979,44 +975,6 @@ int fuse_loop(struct fuse *f); + void fuse_exit(struct fuse *f); + + /** +- * FUSE event loop with multiple threads +- * +- * Requests from the kernel are processed, and the appropriate +- * operations are called. Request are processed in parallel by +- * distributing them between multiple threads. +- * +- * For a description of the return value and the conditions when the +- * event loop exits, refer to the documentation of +- * fuse_session_loop(). +- * +- * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in +- * single-threaded mode, and that you will not have to worry about reentrancy, +- * though you will have to worry about recursive lookups. In single-threaded +- * mode, FUSE will wait for one callback to return before calling another. +- * +- * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make +- * multiple simultaneous calls into the various callback functions given by your +- * fuse_operations record. +- * +- * If you are using multiple threads, you can enjoy all the parallel execution +- * and interactive response benefits of threads, and you get to enjoy all the +- * benefits of race conditions and locking bugs, too. Ensure that any code used +- * in the callback function of fuse_operations is also thread-safe. +- * +- * @param f the FUSE handle +- * @param config loop configuration +- * @return see fuse_session_loop() +- * +- * See also: fuse_loop() +- */ +-#if FUSE_USE_VERSION < 32 +-int fuse_loop_mt_31(struct fuse *f, int clone_fd); +-#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) +-#else +-int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); +-#endif +- +-/** + * Get the current context + * + * The context is only valid for the duration of a filesystem +@@ -1268,8 +1226,4 @@ struct fuse_session *fuse_get_session(struct fuse *f); + */ + int fuse_open_channel(const char *mountpoint, const char *options); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_H_ */ +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index 2d686b2..bf8f8cc 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -28,10 +28,6 @@ + #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) + #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Information about an open file. + * +@@ -100,30 +96,6 @@ struct fuse_file_info { + uint32_t poll_events; + }; + +-/** +- * Configuration parameters passed to fuse_session_loop_mt() and +- * fuse_loop_mt(). +- */ +-struct fuse_loop_config { +- /** +- * whether to use separate device fds for each thread +- * (may increase performance) +- */ +- int clone_fd; +- +- /** +- * The maximum number of available worker threads before they +- * start to get deleted when they become idle. If not +- * specified, the default is 10. +- * +- * Adjusting this has performance implications; a very small number +- * of threads in the pool will cause a lot of thread creation and +- * deletion overhead and performance may suffer. When set to 0, a new +- * thread will be created to service every operation. +- */ +- unsigned int max_idle_threads; +-}; +- + /************************************************************************** + * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * + **************************************************************************/ +@@ -802,10 +774,6 @@ void fuse_remove_signal_handlers(struct fuse_session *se); + # error only API version 30 or greater is supported + #endif + +-#ifdef __cplusplus +-} +-#endif +- + + /* + * This interface uses 64 bit off_t. +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index d38b630..b39522e 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -9,8 +9,6 @@ + #include "fuse.h" + #include "fuse_lowlevel.h" + +-struct mount_opts; +- + struct fuse_req { + struct fuse_session *se; + uint64_t unique; +@@ -45,7 +43,6 @@ struct fuse_session { + char *mountpoint; + volatile int exited; + int fd; +- struct mount_opts *mo; + int debug; + int deny_others; + struct fuse_lowlevel_ops op; +@@ -58,7 +55,6 @@ struct fuse_session { + struct fuse_req interrupts; + pthread_mutex_t lock; + int got_destroy; +- pthread_key_t pipe_key; + int broken_splice_nonblock; + uint64_t notify_ctr; + struct fuse_notify_req notify_list; +@@ -87,53 +83,16 @@ struct fuse_module { + int ctr; + }; + +-/* ----------------------------------------------------------- * +- * Channel interface (when using -o clone_fd) * +- * ----------------------------------------------------------- */ +- +-/** +- * Obtain counted reference to the channel +- * +- * @param ch the channel +- * @return the channel +- */ +-struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); +- +-/** +- * Drop counted reference to a channel +- * +- * @param ch the channel +- */ +-void fuse_chan_put(struct fuse_chan *ch); +- +-struct mount_opts *parse_mount_opts(struct fuse_args *args); +-void destroy_mount_opts(struct mount_opts *mo); +-void fuse_mount_version(void); +-unsigned get_max_read(struct mount_opts *o); +-void fuse_kern_unmount(const char *mountpoint, int fd); +-int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); +- + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + int count); + void fuse_free_req(fuse_req_t req); + +-void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); +- +-int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); +- +-int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, +- struct fuse_chan *ch); + void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch); + +-struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); +-int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); +-int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); + + #define FUSE_MAX_MAX_PAGES 256 + #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 + + /* room needed in buffer to accommodate header */ + #define FUSE_BUFFER_HEADER_SIZE 0x1000 +- +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +index 5e112e0..0af700d 100644 +--- a/tools/virtiofsd/fuse_log.h ++++ b/tools/virtiofsd/fuse_log.h +@@ -16,10 +16,6 @@ + + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Log severity level + * +@@ -75,8 +71,4 @@ void fuse_set_log_func(fuse_log_func_t func); + */ + void fuse_log(enum fuse_log_level level, const char *fmt, ...); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_LOG_H_ */ +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index f2d7038..e6fa247 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -16,7 +16,6 @@ + #include "fuse_kernel.h" + #include "fuse_opt.h" + #include "fuse_misc.h" +-#include "mount_util.h" + + #include + #include +@@ -28,12 +27,6 @@ + #include + #include + +-#ifndef F_LINUX_SPECIFIC_BASE +-#define F_LINUX_SPECIFIC_BASE 1024 +-#endif +-#ifndef F_SETPIPE_SZ +-#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +-#endif + + + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) +@@ -137,7 +130,6 @@ void fuse_free_req(fuse_req_t req) + req->u.ni.data = NULL; + list_del_req(req); + ctr = --req->ctr; +- fuse_chan_put(req->ch); + req->ch = NULL; + pthread_mutex_unlock(&se->lock); + if (!ctr) +@@ -184,19 +176,7 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + } + +- ssize_t res = writev(ch ? ch->fd : se->fd, +- iov, count); +- int err = errno; +- +- if (res == -1) { +- assert(se != NULL); +- +- /* ENOENT means the operation was interrupted */ +- if (!fuse_session_exited(se) && err != ENOENT) +- perror("fuse: writing device"); +- return -err; +- } +- ++ abort(); /* virtio should have taken it before here */ + return 0; + } + +@@ -480,10 +460,6 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + struct fuse_bufvec *buf, + size_t len) + { +- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); +- void *mbuf; +- int res; +- + /* Optimize common case */ + if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && + !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { +@@ -496,350 +472,10 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + return fuse_send_msg(se, ch, iov, iov_count); + } + +- res = posix_memalign(&mbuf, pagesize, len); +- if (res != 0) +- return res; +- +- mem_buf.buf[0].mem = mbuf; +- res = fuse_buf_copy(&mem_buf, buf, 0); +- if (res < 0) { +- free(mbuf); +- return -res; +- } +- len = res; +- +- iov[iov_count].iov_base = mbuf; +- iov[iov_count].iov_len = len; +- iov_count++; +- res = fuse_send_msg(se, ch, iov, iov_count); +- free(mbuf); +- +- return res; +-} +- +-struct fuse_ll_pipe { +- size_t size; +- int can_grow; +- int pipe[2]; +-}; +- +-static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) +-{ +- close(llp->pipe[0]); +- close(llp->pipe[1]); +- free(llp); +-} +- +-#ifdef HAVE_SPLICE +-#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) +-static int fuse_pipe(int fds[2]) +-{ +- int rv = pipe(fds); +- +- if (rv == -1) +- return rv; +- +- if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || +- fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || +- fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || +- fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { +- close(fds[0]); +- close(fds[1]); +- rv = -1; +- } +- return rv; +-} +-#else +-static int fuse_pipe(int fds[2]) +-{ +- return pipe2(fds, O_CLOEXEC | O_NONBLOCK); +-} +-#endif +- +-static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) +-{ +- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); +- if (llp == NULL) { +- int res; +- +- llp = malloc(sizeof(struct fuse_ll_pipe)); +- if (llp == NULL) +- return NULL; +- +- res = fuse_pipe(llp->pipe); +- if (res == -1) { +- free(llp); +- return NULL; +- } +- +- /* +- *the default size is 16 pages on linux +- */ +- llp->size = pagesize * 16; +- llp->can_grow = 1; +- +- pthread_setspecific(se->pipe_key, llp); +- } +- +- return llp; +-} +-#endif +- +-static void fuse_ll_clear_pipe(struct fuse_session *se) +-{ +- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); +- if (llp) { +- pthread_setspecific(se->pipe_key, NULL); +- fuse_ll_pipe_free(llp); +- } +-} +- +-#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) +-static int read_back(int fd, char *buf, size_t len) +-{ +- int res; +- +- res = read(fd, buf, len); +- if (res == -1) { +- fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); +- return -EIO; +- } +- if (res != len) { +- fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); +- return -EIO; +- } ++ abort(); /* Will have taken vhost path */ + return 0; + } + +-static int grow_pipe_to_max(int pipefd) +-{ +- int max; +- int res; +- int maxfd; +- char buf[32]; +- +- maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); +- if (maxfd < 0) +- return -errno; +- +- res = read(maxfd, buf, sizeof(buf) - 1); +- if (res < 0) { +- int saved_errno; +- +- saved_errno = errno; +- close(maxfd); +- return -saved_errno; +- } +- close(maxfd); +- buf[res] = '\0'; +- +- max = atoi(buf); +- res = fcntl(pipefd, F_SETPIPE_SZ, max); +- if (res < 0) +- return -errno; +- return max; +-} +- +-static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) +-{ +- int res; +- size_t len = fuse_buf_size(buf); +- struct fuse_out_header *out = iov[0].iov_base; +- struct fuse_ll_pipe *llp; +- int splice_flags; +- size_t pipesize; +- size_t total_fd_size; +- size_t idx; +- size_t headerlen; +- struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); +- +- if (se->broken_splice_nonblock) +- goto fallback; +- +- if (flags & FUSE_BUF_NO_SPLICE) +- goto fallback; +- +- total_fd_size = 0; +- for (idx = buf->idx; idx < buf->count; idx++) { +- if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { +- total_fd_size = buf->buf[idx].size; +- if (idx == buf->idx) +- total_fd_size -= buf->off; +- } +- } +- if (total_fd_size < 2 * pagesize) +- goto fallback; +- +- if (se->conn.proto_minor < 14 || +- !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) +- goto fallback; +- +- llp = fuse_ll_get_pipe(se); +- if (llp == NULL) +- goto fallback; +- +- +- headerlen = iov_length(iov, iov_count); +- +- out->len = headerlen + len; +- +- /* +- * Heuristic for the required pipe size, does not work if the +- * source contains less than page size fragments +- */ +- pipesize = pagesize * (iov_count + buf->count + 1) + out->len; +- +- if (llp->size < pipesize) { +- if (llp->can_grow) { +- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); +- if (res == -1) { +- res = grow_pipe_to_max(llp->pipe[0]); +- if (res > 0) +- llp->size = res; +- llp->can_grow = 0; +- goto fallback; +- } +- llp->size = res; +- } +- if (llp->size < pipesize) +- goto fallback; +- } +- +- +- res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); +- if (res == -1) +- goto fallback; +- +- if (res != headerlen) { +- res = -EIO; +- fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, +- headerlen); +- goto clear_pipe; +- } +- +- pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; +- pipe_buf.buf[0].fd = llp->pipe[1]; +- +- res = fuse_buf_copy(&pipe_buf, buf, +- FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); +- if (res < 0) { +- if (res == -EAGAIN || res == -EINVAL) { +- /* +- * Should only get EAGAIN on kernels with +- * broken SPLICE_F_NONBLOCK support (<= +- * 2.6.35) where this error or a short read is +- * returned even if the pipe itself is not +- * full +- * +- * EINVAL might mean that splice can't handle +- * this combination of input and output. +- */ +- if (res == -EAGAIN) +- se->broken_splice_nonblock = 1; +- +- pthread_setspecific(se->pipe_key, NULL); +- fuse_ll_pipe_free(llp); +- goto fallback; +- } +- res = -res; +- goto clear_pipe; +- } +- +- if (res != 0 && res < len) { +- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); +- void *mbuf; +- size_t now_len = res; +- /* +- * For regular files a short count is either +- * 1) due to EOF, or +- * 2) because of broken SPLICE_F_NONBLOCK (see above) +- * +- * For other inputs it's possible that we overflowed +- * the pipe because of small buffer fragments. +- */ +- +- res = posix_memalign(&mbuf, pagesize, len); +- if (res != 0) +- goto clear_pipe; +- +- mem_buf.buf[0].mem = mbuf; +- mem_buf.off = now_len; +- res = fuse_buf_copy(&mem_buf, buf, 0); +- if (res > 0) { +- char *tmpbuf; +- size_t extra_len = res; +- /* +- * Trickiest case: got more data. Need to get +- * back the data from the pipe and then fall +- * back to regular write. +- */ +- tmpbuf = malloc(headerlen); +- if (tmpbuf == NULL) { +- free(mbuf); +- res = ENOMEM; +- goto clear_pipe; +- } +- res = read_back(llp->pipe[0], tmpbuf, headerlen); +- free(tmpbuf); +- if (res != 0) { +- free(mbuf); +- goto clear_pipe; +- } +- res = read_back(llp->pipe[0], mbuf, now_len); +- if (res != 0) { +- free(mbuf); +- goto clear_pipe; +- } +- len = now_len + extra_len; +- iov[iov_count].iov_base = mbuf; +- iov[iov_count].iov_len = len; +- iov_count++; +- res = fuse_send_msg(se, ch, iov, iov_count); +- free(mbuf); +- return res; +- } +- free(mbuf); +- res = now_len; +- } +- len = res; +- out->len = headerlen + len; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, success, outsize: %i (splice)\n", +- (unsigned long long) out->unique, out->len); +- } +- +- splice_flags = 0; +- if ((flags & FUSE_BUF_SPLICE_MOVE) && +- (se->conn.want & FUSE_CAP_SPLICE_MOVE)) +- splice_flags |= SPLICE_F_MOVE; +- +- res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, +- NULL, out->len, splice_flags); +- if (res == -1) { +- res = -errno; +- perror("fuse: splice from pipe"); +- goto clear_pipe; +- } +- if (res != out->len) { +- res = -EIO; +- fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", +- res, out->len); +- goto clear_pipe; +- } +- return 0; +- +-clear_pipe: +- fuse_ll_clear_pipe(se); +- return res; +- +-fallback: +- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); +-} +-#else + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int iov_count, + struct fuse_bufvec *buf, unsigned int flags) +@@ -849,7 +485,6 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } +-#endif + + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags) +@@ -1408,16 +1043,11 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + if (bufv.buf[0].size < arg->size) { + fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); + fuse_reply_err(req, EIO); +- goto out; ++ return; + } + bufv.buf[0].size = arg->size; + + se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); +- +-out: +- /* Need to reset the pipe if ->write_buf() didn't consume all data */ +- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) +- fuse_ll_clear_pipe(se); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +@@ -2038,17 +1668,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + return; + } + +- unsigned max_read_mo = get_max_read(se->mo); +- if (se->conn.max_read != max_read_mo) { +- fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " +- "requested different maximum read size (%u vs %u)\n", +- se->conn.max_read, max_read_mo); +- fuse_reply_err(req, EPROTO); +- se->error = -EPROTO; +- fuse_session_exit(se); +- return; +- } +- + if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { + se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; + } +@@ -2364,8 +1983,6 @@ static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, + } + out: + free(rreq); +- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) +- fuse_ll_clear_pipe(se); + } + + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +@@ -2496,7 +2113,6 @@ static struct { + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, + [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, + [FUSE_LSEEK] = { do_lseek, "LSEEK" }, +- [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, + }; + + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) +@@ -2509,21 +2125,6 @@ static const char *opname(enum fuse_opcode opcode) + return fuse_ll_ops[opcode].name; + } + +-static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, +- struct fuse_bufvec *src) +-{ +- ssize_t res = fuse_buf_copy(dst, src, 0); +- if (res < 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); +- return res; +- } +- if ((size_t)res < fuse_buf_size(dst)) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); +- return -1; +- } +- return 0; +-} +- + void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf) + { +@@ -2533,36 +2134,12 @@ void fuse_session_process_buf(struct fuse_session *se, + void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch) + { +- const size_t write_header_size = sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in); +- struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; +- struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; +- void *mbuf = NULL; + int err; +- int res; +- +- if (buf->flags & FUSE_BUF_IS_FD) { +- if (buf->size < tmpbuf.buf[0].size) +- tmpbuf.buf[0].size = buf->size; + +- mbuf = malloc(tmpbuf.buf[0].size); +- if (mbuf == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); +- goto clear_pipe; +- } +- tmpbuf.buf[0].mem = mbuf; +- +- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); +- if (res < 0) +- goto clear_pipe; +- +- in = mbuf; +- } else { +- in = buf->mem; +- } ++ in = buf->mem; + + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, +@@ -2584,14 +2161,14 @@ void fuse_session_process_buf_int(struct fuse_session *se, + }; + + fuse_send_msg(se, ch, &iov, 1); +- goto clear_pipe; ++ return; + } + + req->unique = in->unique; + req->ctx.uid = in->uid; + req->ctx.gid = in->gid; + req->ctx.pid = in->pid; +- req->ch = ch ? fuse_chan_get(ch) : NULL; ++ req->ch = ch; + + err = EIO; + if (!se->got_init) { +@@ -2627,28 +2204,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + fuse_reply_err(intr, EAGAIN); + } + +- if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && +- (in->opcode != FUSE_WRITE || !se->op.write_buf) && +- in->opcode != FUSE_NOTIFY_REPLY) { +- void *newmbuf; +- +- err = ENOMEM; +- newmbuf = realloc(mbuf, buf->size); +- if (newmbuf == NULL) +- goto reply_err; +- mbuf = newmbuf; +- +- tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); +- tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; +- +- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); +- err = -res; +- if (res < 0) +- goto reply_err; +- +- in = mbuf; +- } +- + inarg = (void *) &in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) + do_write_buf(req, in->nodeid, inarg, buf); +@@ -2657,16 +2212,10 @@ void fuse_session_process_buf_int(struct fuse_session *se, + else + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + +-out_free: +- free(mbuf); + return; + + reply_err: + fuse_reply_err(req, err); +-clear_pipe: +- if (buf->flags & FUSE_BUF_IS_FD) +- fuse_ll_clear_pipe(se); +- goto out_free; + } + + #define LL_OPTION(n,o,v) \ +@@ -2684,7 +2233,6 @@ void fuse_lowlevel_version(void) + { + printf("using FUSE kernel interface version %i.%i\n", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); +- fuse_mount_version(); + } + + void fuse_lowlevel_help(void) +@@ -2692,204 +2240,29 @@ void fuse_lowlevel_help(void) + /* These are not all options, but the ones that are + potentially of interest to an end-user */ + printf( +-" -o allow_other allow access by all users\n" + " -o allow_root allow access by root\n" +-" -o auto_unmount auto unmount on process termination\n"); ++); + } + + void fuse_session_destroy(struct fuse_session *se) + { +- struct fuse_ll_pipe *llp; +- + if (se->got_init && !se->got_destroy) { + if (se->op.destroy) + se->op.destroy(se->userdata); + } +- llp = pthread_getspecific(se->pipe_key); +- if (llp != NULL) +- fuse_ll_pipe_free(llp); +- pthread_key_delete(se->pipe_key); + pthread_mutex_destroy(&se->lock); + free(se->cuse_data); + if (se->fd != -1) + close(se->fd); +- destroy_mount_opts(se->mo); + free(se); + } + + +-static void fuse_ll_pipe_destructor(void *data) +-{ +- struct fuse_ll_pipe *llp = data; +- fuse_ll_pipe_free(llp); +-} +- +-int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) +-{ +- return fuse_session_receive_buf_int(se, buf, NULL); +-} +- +-int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, +- struct fuse_chan *ch) +-{ +- int err; +- ssize_t res; +-#ifdef HAVE_SPLICE +- size_t bufsize = se->bufsize; +- struct fuse_ll_pipe *llp; +- struct fuse_buf tmpbuf; +- +- if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) +- goto fallback; +- +- llp = fuse_ll_get_pipe(se); +- if (llp == NULL) +- goto fallback; +- +- if (llp->size < bufsize) { +- if (llp->can_grow) { +- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); +- if (res == -1) { +- llp->can_grow = 0; +- res = grow_pipe_to_max(llp->pipe[0]); +- if (res > 0) +- llp->size = res; +- goto fallback; +- } +- llp->size = res; +- } +- if (llp->size < bufsize) +- goto fallback; +- } +- +- res = splice(ch ? ch->fd : se->fd, +- NULL, llp->pipe[1], NULL, bufsize, 0); +- err = errno; +- +- if (fuse_session_exited(se)) +- return 0; +- +- if (res == -1) { +- if (err == ENODEV) { +- /* Filesystem was unmounted, or connection was aborted +- via /sys/fs/fuse/connections */ +- fuse_session_exit(se); +- return 0; +- } +- if (err != EINTR && err != EAGAIN) +- perror("fuse: splice from device"); +- return -err; +- } +- +- if (res < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); +- return -EIO; +- } +- +- tmpbuf = (struct fuse_buf) { +- .size = res, +- .flags = FUSE_BUF_IS_FD, +- .fd = llp->pipe[0], +- }; +- +- /* +- * Don't bother with zero copy for small requests. +- * fuse_loop_mt() needs to check for FORGET so this more than +- * just an optimization. +- */ +- if (res < sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in) + pagesize) { +- struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; +- struct fuse_bufvec dst = { .count = 1 }; +- +- if (!buf->mem) { +- buf->mem = malloc(se->bufsize); +- if (!buf->mem) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: failed to allocate read buffer\n"); +- return -ENOMEM; +- } +- } +- buf->size = se->bufsize; +- buf->flags = 0; +- dst.buf[0] = *buf; +- +- res = fuse_buf_copy(&dst, &src, 0); +- if (res < 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", +- strerror(-res)); +- fuse_ll_clear_pipe(se); +- return res; +- } +- if (res < tmpbuf.size) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); +- fuse_ll_clear_pipe(se); +- return -EIO; +- } +- assert(res == tmpbuf.size); +- +- } else { +- /* Don't overwrite buf->mem, as that would cause a leak */ +- buf->fd = tmpbuf.fd; +- buf->flags = tmpbuf.flags; +- } +- buf->size = tmpbuf.size; +- +- return res; +- +-fallback: +-#endif +- if (!buf->mem) { +- buf->mem = malloc(se->bufsize); +- if (!buf->mem) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: failed to allocate read buffer\n"); +- return -ENOMEM; +- } +- } +- +-restart: +- res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); +- err = errno; +- +- if (fuse_session_exited(se)) +- return 0; +- if (res == -1) { +- /* ENOENT means the operation was interrupted, it's safe +- to restart */ +- if (err == ENOENT) +- goto restart; +- +- if (err == ENODEV) { +- /* Filesystem was unmounted, or connection was aborted +- via /sys/fs/fuse/connections */ +- fuse_session_exit(se); +- return 0; +- } +- /* Errors occurring during normal operation: EINTR (read +- interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem +- umounted) */ +- if (err != EINTR && err != EAGAIN) +- perror("fuse: reading device"); +- return -err; +- } +- if ((size_t) res < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); +- return -EIO; +- } +- +- buf->size = res; +- +- return res; +-} +- + struct fuse_session *fuse_session_new(struct fuse_args *args, + const struct fuse_lowlevel_ops *op, + size_t op_size, void *userdata) + { +- int err; + struct fuse_session *se; +- struct mount_opts *mo; + + if (sizeof(struct fuse_lowlevel_ops) < op_size) { + fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); +@@ -2913,20 +2286,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + /* Parse options */ + if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) + goto out2; +- if(se->deny_others) { +- /* Allowing access only by root is done by instructing +- * kernel to allow access by everyone, and then restricting +- * access to root and mountpoint owner in libfuse. +- */ +- // We may be adding the option a second time, but +- // that doesn't hurt. +- if(fuse_opt_add_arg(args, "-oallow_other") == -1) +- goto out2; +- } +- mo = parse_mount_opts(args); +- if (mo == NULL) +- goto out3; +- + if(args->argc == 1 && + args->argv[0][0] == '-') { + fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " +@@ -2940,9 +2299,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + +- if (se->debug) +- fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); +- + se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + + FUSE_BUFFER_HEADER_SIZE; + +@@ -2952,26 +2308,14 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + se->notify_ctr = 1; + fuse_mutex_init(&se->lock); + +- err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); +- if (err) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", +- strerror(err)); +- goto out5; +- } +- + memcpy(&se->op, op, op_size); + se->owner = getuid(); + se->userdata = userdata; + +- se->mo = mo; + return se; + +-out5: +- pthread_mutex_destroy(&se->lock); + out4: + fuse_opt_free_args(args); +-out3: +- free(mo); + out2: + free(se); + out1: +@@ -3035,11 +2379,6 @@ int fuse_session_fd(struct fuse_session *se) + + void fuse_session_unmount(struct fuse_session *se) + { +- if (se->mountpoint != NULL) { +- fuse_kern_unmount(se->mountpoint, se->fd); +- free(se->mountpoint); +- se->mountpoint = NULL; +- } + } + + #ifdef linux +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 18c6363..6b1adfc 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -31,10 +31,6 @@ + #include + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* ----------------------------------------------------------- * + * Miscellaneous definitions * + * ----------------------------------------------------------- */ +@@ -1863,14 +1859,12 @@ void fuse_cmdline_help(void); + * ----------------------------------------------------------- */ + + struct fuse_cmdline_opts { +- int singlethread; + int foreground; + int debug; + int nodefault_subtype; + char *mountpoint; + int show_version; + int show_help; +- int clone_fd; + unsigned int max_idle_threads; + }; + +@@ -1962,24 +1956,6 @@ int fuse_session_mount(struct fuse_session *se, const char *mountpoint); + int fuse_session_loop(struct fuse_session *se); + + /** +- * Enter a multi-threaded event loop. +- * +- * For a description of the return value and the conditions when the +- * event loop exits, refer to the documentation of +- * fuse_session_loop(). +- * +- * @param se the session +- * @param config session loop configuration +- * @return see fuse_session_loop() +- */ +-#if FUSE_USE_VERSION < 32 +-int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); +-#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) +-#else +-int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); +-#endif +- +-/** + * Flag a session as terminated. + * + * This function is invoked by the POSIX signal handlers, when +@@ -2082,8 +2058,4 @@ void fuse_session_process_buf(struct fuse_session *se, + */ + int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +index d8573e7..6910255 100644 +--- a/tools/virtiofsd/fuse_opt.h ++++ b/tools/virtiofsd/fuse_opt.h +@@ -14,10 +14,6 @@ + * This file defines the option parsing interface of FUSE + */ + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Option description + * +@@ -264,8 +260,4 @@ void fuse_opt_free_args(struct fuse_args *args); + */ + int fuse_opt_match(const struct fuse_opt opts[], const char *opt); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_OPT_H_ */ +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 64ff7ad..5a2e64c 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -41,14 +41,10 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), + FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("-f", foreground), +- FUSE_HELPER_OPT("-s", singlethread), + FUSE_HELPER_OPT("fsname=", nodefault_subtype), + FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), +-#ifndef __FreeBSD__ + FUSE_HELPER_OPT("subtype=", nodefault_subtype), + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), +-#endif +- FUSE_HELPER_OPT("clone_fd", clone_fd), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), + FUSE_OPT_END + }; +@@ -132,9 +128,6 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" +- " -s disable multi-threaded operation\n" +- " -o clone_fd use separate fuse device fd for each thread\n" +- " (may improve performance)\n" + " -o max_idle_threads the maximum number of idle worker threads\n" + " allowed (default: 10)\n"); + } +@@ -171,34 +164,6 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, + } + } + +-/* Under FreeBSD, there is no subtype option so this +- function actually sets the fsname */ +-static int add_default_subtype(const char *progname, struct fuse_args *args) +-{ +- int res; +- char *subtype_opt; +- +- const char *basename = strrchr(progname, '/'); +- if (basename == NULL) +- basename = progname; +- else if (basename[1] != '\0') +- basename++; +- +- subtype_opt = (char *) malloc(strlen(basename) + 64); +- if (subtype_opt == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; +- } +-#ifdef __FreeBSD__ +- sprintf(subtype_opt, "-ofsname=%s", basename); +-#else +- sprintf(subtype_opt, "-osubtype=%s", basename); +-#endif +- res = fuse_opt_add_arg(args, subtype_opt); +- free(subtype_opt); +- return res; +-} +- + int fuse_parse_cmdline(struct fuse_args *args, + struct fuse_cmdline_opts *opts) + { +@@ -210,14 +175,6 @@ int fuse_parse_cmdline(struct fuse_args *args, + fuse_helper_opt_proc) == -1) + return -1; + +- /* *Linux*: if neither -o subtype nor -o fsname are specified, +- set subtype to program's basename. +- *FreeBSD*: if fsname is not specified, set to program's +- basename. */ +- if (!opts->nodefault_subtype) +- if (add_default_subtype(args->argv[0], args) == -1) +- return -1; +- + return 0; + } + +@@ -276,88 +233,6 @@ int fuse_daemonize(int foreground) + return 0; + } + +-int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, +- size_t op_size, void *user_data) +-{ +- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +- struct fuse *fuse; +- struct fuse_cmdline_opts opts; +- int res; +- +- if (fuse_parse_cmdline(&args, &opts) != 0) +- return 1; +- +- if (opts.show_version) { +- printf("FUSE library version %s\n", PACKAGE_VERSION); +- fuse_lowlevel_version(); +- res = 0; +- goto out1; +- } +- +- if (opts.show_help) { +- if(args.argv[0][0] != '\0') +- printf("usage: %s [options] \n\n", +- args.argv[0]); +- printf("FUSE options:\n"); +- fuse_cmdline_help(); +- fuse_lib_help(&args); +- res = 0; +- goto out1; +- } +- +- if (!opts.show_help && +- !opts.mountpoint) { +- fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); +- res = 2; +- goto out1; +- } +- +- +- fuse = fuse_new_31(&args, op, op_size, user_data); +- if (fuse == NULL) { +- res = 3; +- goto out1; +- } +- +- if (fuse_mount(fuse,opts.mountpoint) != 0) { +- res = 4; +- goto out2; +- } +- +- if (fuse_daemonize(opts.foreground) != 0) { +- res = 5; +- goto out3; +- } +- +- struct fuse_session *se = fuse_get_session(fuse); +- if (fuse_set_signal_handlers(se) != 0) { +- res = 6; +- goto out3; +- } +- +- if (opts.singlethread) +- res = fuse_loop(fuse); +- else { +- struct fuse_loop_config loop_config; +- loop_config.clone_fd = opts.clone_fd; +- loop_config.max_idle_threads = opts.max_idle_threads; +- res = fuse_loop_mt_32(fuse, &loop_config); +- } +- if (res) +- res = 7; +- +- fuse_remove_signal_handlers(se); +-out3: +- fuse_unmount(fuse); +-out2: +- fuse_destroy(fuse); +-out1: +- free(opts.mountpoint); +- fuse_opt_free_args(&args); +- return res; +-} +- +- + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, + struct fuse_conn_info *conn) + { +@@ -420,21 +295,3 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) + } + return opts; + } +- +-int fuse_open_channel(const char *mountpoint, const char* options) +-{ +- struct mount_opts *opts = NULL; +- int fd = -1; +- const char *argv[] = { "", "-o", options }; +- int argc = sizeof(argv) / sizeof(argv[0]); +- struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); +- +- opts = parse_mount_opts(&args); +- if (opts == NULL) +- return -1; +- +- fd = fuse_kern_mount(mountpoint, opts); +- destroy_mount_opts(opts); +- +- return fd; +-} +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +index 6b77c33..7c5f561 100644 +--- a/tools/virtiofsd/passthrough_helpers.h ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -42,32 +42,6 @@ static int mknod_wrapper(int dirfd, const char *path, const char *link, + res = symlinkat(link, dirfd, path); + } else if (S_ISFIFO(mode)) { + res = mkfifoat(dirfd, path, mode); +-#ifdef __FreeBSD__ +- } else if (S_ISSOCK(mode)) { +- struct sockaddr_un su; +- int fd; +- +- if (strlen(path) >= sizeof(su.sun_path)) { +- errno = ENAMETOOLONG; +- return -1; +- } +- fd = socket(AF_UNIX, SOCK_STREAM, 0); +- if (fd >= 0) { +- /* +- * We must bind the socket to the underlying file +- * system to create the socket file, even though +- * we'll never listen on this socket. +- */ +- su.sun_family = AF_UNIX; +- strncpy(su.sun_path, path, sizeof(su.sun_path)); +- res = bindat(dirfd, fd, (struct sockaddr*)&su, +- sizeof(su)); +- if (res == 0) +- close(fd); +- } else { +- res = -1; +- } +-#endif + } else { + res = mknodat(dirfd, path, mode, rdev); + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e1a6056..e5f7115 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1240,7 +1240,6 @@ int main(int argc, char *argv[]) + ret = 0; + goto err_out1; + } else if (opts.show_version) { +- printf("FUSE library version %s\n", fuse_pkgversion()); + fuse_lowlevel_version(); + ret = 0; + goto err_out1; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch b/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch new file mode 100644 index 0000000..411af77 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch @@ -0,0 +1,545 @@ +From ff16b837e402de773581f77ca188f8806c0b500f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:51 +0100 +Subject: [PATCH 020/116] virtiofsd: Trim out compatibility code +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-17-dgilbert@redhat.com> +Patchwork-id: 93468 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 016/112] virtiofsd: Trim out compatibility code +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +virtiofsd only supports major=7, minor>=31; trim out a lot of +old compatibility code. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 72c42e2d65510e073cf78fdc924d121c77fa0080) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 330 +++++++++++++++------------------------- + 1 file changed, 119 insertions(+), 211 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 07fb8a6..514d79c 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -387,16 +387,7 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) + { + struct fuse_entry_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : +- sizeof(arg); +- +- /* +- * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant +- * negative entry +- */ +- if (!e->ino && req->se->conn.proto_minor < 4) { +- return fuse_reply_err(req, ENOENT); +- } ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + fill_entry(&arg, e); +@@ -407,9 +398,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, + const struct fuse_file_info *f) + { + char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; +- size_t entrysize = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : +- sizeof(struct fuse_entry_out); ++ size_t entrysize = sizeof(struct fuse_entry_out); + struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; + struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); + +@@ -423,8 +412,7 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, + double attr_timeout) + { + struct fuse_attr_out arg; +- size_t size = +- req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + arg.attr_valid = calc_timeout_sec(attr_timeout); +@@ -519,8 +507,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) + { + struct fuse_statfs_out arg; +- size_t size = +- req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + convert_statfs(stbuf, &arg.st); +@@ -604,45 +591,31 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, + iov[count].iov_len = sizeof(arg); + count++; + +- if (req->se->conn.proto_minor < 16) { +- if (in_count) { +- iov[count].iov_base = (void *)in_iov; +- iov[count].iov_len = sizeof(in_iov[0]) * in_count; +- count++; +- } ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } + +- if (out_count) { +- iov[count].iov_base = (void *)out_iov; +- iov[count].iov_len = sizeof(out_iov[0]) * out_count; +- count++; ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) { ++ goto enomem; + } +- } else { +- /* Can't handle non-compat 64bit ioctls on 32bit */ +- if (sizeof(void *) == 4 && req->ioctl_64bit) { +- res = fuse_reply_err(req, EINVAL); +- goto out; +- } +- +- if (in_count) { +- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); +- if (!in_fiov) { +- goto enomem; +- } + +- iov[count].iov_base = (void *)in_fiov; +- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; +- count++; ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) { ++ goto enomem; + } +- if (out_count) { +- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); +- if (!out_fiov) { +- goto enomem; +- } + +- iov[count].iov_base = (void *)out_fiov; +- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; +- count++; +- } ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; + } + + res = send_reply_iov(req, 0, iov, count); +@@ -784,14 +757,12 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + struct fuse_file_info *fip = NULL; + struct fuse_file_info fi; + +- if (req->se->conn.proto_minor >= 9) { +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; + +- if (arg->getattr_flags & FUSE_GETATTR_FH) { +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fip = &fi; +- } ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; + } + + if (req->se->op.getattr) { +@@ -856,11 +827,7 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; + char *name = PARAM(arg); + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } else { +- name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; +- } ++ req->ctx.umask = arg->umask; + + if (req->se->op.mknod) { + req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); +@@ -873,9 +840,7 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { + struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } ++ req->ctx.umask = arg->umask; + + if (req->se->op.mkdir) { + req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); +@@ -967,11 +932,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } else { +- name = (char *)inarg + sizeof(struct fuse_open_in); +- } ++ req->ctx.umask = arg->umask; + + req->se->op.create(req, nodeid, name, arg->mode, &fi); + } else { +@@ -1003,10 +964,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 9) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- } ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; + req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); + } else { + fuse_reply_err(req, ENOSYS); +@@ -1023,13 +982,9 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + fi.fh = arg->fh; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + +- if (req->se->conn.proto_minor < 9) { +- param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- param = PARAM(arg); +- } ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); + + if (req->se->op.write) { + req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); +@@ -1053,21 +1008,14 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + +- if (se->conn.proto_minor < 9) { +- bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; +- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); + } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); + if (bufv.buf[0].size < arg->size) { + fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); + fuse_reply_err(req, EIO); +@@ -1086,9 +1034,7 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.flush = 1; +- if (req->se->conn.proto_minor >= 7) { +- fi.lock_owner = arg->lock_owner; +- } ++ fi.lock_owner = arg->lock_owner; + + if (req->se->op.flush) { + req->se->op.flush(req, nodeid, &fi); +@@ -1105,10 +1051,8 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 8) { +- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; +- fi.lock_owner = arg->lock_owner; +- } ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { + fi.flock_release = 1; + fi.lock_owner = arg->lock_owner; +@@ -1477,8 +1421,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && +- !(flags & FUSE_IOCTL_32BIT)) { ++ if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) { + req->ioctl_64bit = 1; + } + +@@ -1603,7 +1546,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + outarg.major = FUSE_KERNEL_VERSION; + outarg.minor = FUSE_KERNEL_MINOR_VERSION; + +- if (arg->major < 7) { ++ if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) { + fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", + arg->major, arg->minor); + fuse_reply_err(req, EPROTO); +@@ -1616,81 +1559,71 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + return; + } + +- if (arg->minor >= 6) { +- if (arg->max_readahead < se->conn.max_readahead) { +- se->conn.max_readahead = arg->max_readahead; +- } +- if (arg->flags & FUSE_ASYNC_READ) { +- se->conn.capable |= FUSE_CAP_ASYNC_READ; +- } +- if (arg->flags & FUSE_POSIX_LOCKS) { +- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; +- } +- if (arg->flags & FUSE_ATOMIC_O_TRUNC) { +- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; +- } +- if (arg->flags & FUSE_EXPORT_SUPPORT) { +- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; +- } +- if (arg->flags & FUSE_DONT_MASK) { +- se->conn.capable |= FUSE_CAP_DONT_MASK; +- } +- if (arg->flags & FUSE_FLOCK_LOCKS) { +- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; +- } +- if (arg->flags & FUSE_AUTO_INVAL_DATA) { +- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; +- } +- if (arg->flags & FUSE_DO_READDIRPLUS) { +- se->conn.capable |= FUSE_CAP_READDIRPLUS; +- } +- if (arg->flags & FUSE_READDIRPLUS_AUTO) { +- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; +- } +- if (arg->flags & FUSE_ASYNC_DIO) { +- se->conn.capable |= FUSE_CAP_ASYNC_DIO; +- } +- if (arg->flags & FUSE_WRITEBACK_CACHE) { +- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; +- } +- if (arg->flags & FUSE_NO_OPEN_SUPPORT) { +- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; +- } +- if (arg->flags & FUSE_PARALLEL_DIROPS) { +- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; +- } +- if (arg->flags & FUSE_POSIX_ACL) { +- se->conn.capable |= FUSE_CAP_POSIX_ACL; +- } +- if (arg->flags & FUSE_HANDLE_KILLPRIV) { +- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; +- } +- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { +- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; +- } +- if (!(arg->flags & FUSE_MAX_PAGES)) { +- size_t max_bufsize = +- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + +- FUSE_BUFFER_HEADER_SIZE; +- if (bufsize > max_bufsize) { +- bufsize = max_bufsize; +- } ++ if (arg->max_readahead < se->conn.max_readahead) { ++ se->conn.max_readahead = arg->max_readahead; ++ } ++ if (arg->flags & FUSE_ASYNC_READ) { ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ } ++ if (arg->flags & FUSE_POSIX_LOCKS) { ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ } ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ } ++ if (arg->flags & FUSE_EXPORT_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ if (arg->flags & FUSE_DONT_MASK) { ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ } ++ if (arg->flags & FUSE_FLOCK_LOCKS) { ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ } ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) { ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ } ++ if (arg->flags & FUSE_DO_READDIRPLUS) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ } ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ } ++ if (arg->flags & FUSE_ASYNC_DIO) { ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ } ++ if (arg->flags & FUSE_WRITEBACK_CACHE) { ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ } ++ if (arg->flags & FUSE_PARALLEL_DIROPS) { ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ } ++ if (arg->flags & FUSE_POSIX_ACL) { ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ } ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) { ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ } ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ } ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; + } +- } else { +- se->conn.max_readahead = 0; + } +- +- if (se->conn.proto_minor >= 14) { + #ifdef HAVE_SPLICE + #ifdef HAVE_VMSPLICE +- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; + #endif +- se->conn.capable |= FUSE_CAP_SPLICE_READ; ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; + #endif +- } +- if (se->conn.proto_minor >= 18) { +- se->conn.capable |= FUSE_CAP_IOCTL_DIR; +- } ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; + + /* + * Default settings for modern filesystems. +@@ -1797,24 +1730,20 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + outarg.max_readahead = se->conn.max_readahead; + outarg.max_write = se->conn.max_write; +- if (se->conn.proto_minor >= 13) { +- if (se->conn.max_background >= (1 << 16)) { +- se->conn.max_background = (1 << 16) - 1; +- } +- if (se->conn.congestion_threshold > se->conn.max_background) { +- se->conn.congestion_threshold = se->conn.max_background; +- } +- if (!se->conn.congestion_threshold) { +- se->conn.congestion_threshold = se->conn.max_background * 3 / 4; +- } +- +- outarg.max_background = se->conn.max_background; +- outarg.congestion_threshold = se->conn.congestion_threshold; ++ if (se->conn.max_background >= (1 << 16)) { ++ se->conn.max_background = (1 << 16) - 1; ++ } ++ if (se->conn.congestion_threshold > se->conn.max_background) { ++ se->conn.congestion_threshold = se->conn.max_background; + } +- if (se->conn.proto_minor >= 23) { +- outarg.time_gran = se->conn.time_gran; ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; + } + ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ outarg.time_gran = se->conn.time_gran; ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, + outarg.minor); +@@ -1828,11 +1757,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + outarg.congestion_threshold); + fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); + } +- if (arg->minor < 5) { +- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; +- } else if (arg->minor < 23) { +- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; +- } + + send_reply_ok(req, &outarg, outargsize); + } +@@ -1896,10 +1820,6 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { +- return -ENOSYS; +- } +- + outarg.ino = ino; + outarg.off = off; + outarg.len = len; +@@ -1920,10 +1840,6 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { +- return -ENOSYS; +- } +- + outarg.parent = parent; + outarg.namelen = namelen; + outarg.padding = 0; +@@ -1947,10 +1863,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { +- return -ENOSYS; +- } +- + outarg.parent = parent; + outarg.child = child; + outarg.namelen = namelen; +@@ -1977,10 +1889,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { +- return -ENOSYS; +- } +- + out.unique = 0; + out.error = FUSE_NOTIFY_STORE; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch b/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch new file mode 100644 index 0000000..a0882d5 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch @@ -0,0 +1,93 @@ +From e4c8fd1060fb69a093064851ebf66dd82533ec0e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:17 +0100 +Subject: [PATCH 106/116] virtiofsd: add definition of fuse_buf_writev() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-103-dgilbert@redhat.com> +Patchwork-id: 93557 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 102/112] virtiofsd: add definition of fuse_buf_writev() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: piaojun + +Define fuse_buf_writev() which use pwritev and writev to improve io +bandwidth. Especially, the src bufs with 0 size should be skipped as +their mems are not *block_size* aligned which will cause writev failed +in direct io mode. + +Signed-off-by: Jun Piao +Suggested-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9ceaaa15cf21073c2b23058c374f61c30cd39c31) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 42a608f..37befeb 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -14,6 +14,7 @@ + #include "fuse_lowlevel.h" + #include + #include ++#include + #include + #include + +@@ -33,6 +34,43 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) + return size; + } + ++__attribute__((unused)) ++static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, ++ struct fuse_bufvec *in_buf) ++{ ++ ssize_t res, i, j; ++ size_t iovcnt = in_buf->count; ++ struct iovec *iov; ++ int fd = out_buf->fd; ++ ++ iov = calloc(iovcnt, sizeof(struct iovec)); ++ if (!iov) { ++ return -ENOMEM; ++ } ++ ++ for (i = 0, j = 0; i < iovcnt; i++) { ++ /* Skip the buf with 0 size */ ++ if (in_buf->buf[i].size) { ++ iov[j].iov_base = in_buf->buf[i].mem; ++ iov[j].iov_len = in_buf->buf[i].size; ++ j++; ++ } ++ } ++ ++ if (out_buf->flags & FUSE_BUF_FD_SEEK) { ++ res = pwritev(fd, iov, iovcnt, out_buf->pos); ++ } else { ++ res = writev(fd, iov, iovcnt); ++ } ++ ++ if (res == -1) { ++ res = -errno; ++ } ++ ++ free(iov); ++ return res; ++} ++ + static size_t min_size(size_t s1, size_t s2) + { + return s1 < s2 ? s1 : s2; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch b/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch new file mode 100644 index 0000000..451f12b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch @@ -0,0 +1,170 @@ +From f91a9bdc171142174110e9ff1716b611f6fb0039 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:07 +0100 +Subject: [PATCH 036/116] virtiofsd: add --fd=FDNUM fd passing option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-33-dgilbert@redhat.com> +Patchwork-id: 93487 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 032/112] virtiofsd: add --fd=FDNUM fd passing option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Although --socket-path=PATH is useful for manual invocations, management +tools typically create the UNIX domain socket themselves and pass it to +the vhost-user device backend. This way QEMU can be launched +immediately with a valid socket. No waiting for the vhost-user device +backend is required when fd passing is used. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cee8e35d4386e34bf79c3ca2aab7f7b1bb48cf8d) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 16 ++++++++++++---- + tools/virtiofsd/fuse_virtio.c | 31 +++++++++++++++++++++++++------ + 3 files changed, 38 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 1126723..45995f3 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -68,6 +68,7 @@ struct fuse_session { + size_t bufsize; + int error; + char *vu_socket_path; ++ int vu_listen_fd; + int vu_socketfd; + struct fv_VuDev *virtio_dev; + }; +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 4f4684d..95f4db8 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2130,6 +2130,7 @@ static const struct fuse_opt fuse_ll_opts[] = { + LL_OPTION("--debug", debug, 1), + LL_OPTION("allow_root", deny_others, 1), + LL_OPTION("--socket-path=%s", vu_socket_path, 0), ++ LL_OPTION("--fd=%d", vu_listen_fd, 0), + FUSE_OPT_END + }; + +@@ -2147,7 +2148,8 @@ void fuse_lowlevel_help(void) + */ + printf( + " -o allow_root allow access by root\n" +- " --socket-path=PATH path for the vhost-user socket\n"); ++ " --socket-path=PATH path for the vhost-user socket\n" ++ " --fd=FDNUM fd number of vhost-user socket\n"); + } + + void fuse_session_destroy(struct fuse_session *se) +@@ -2191,6 +2193,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out1; + } + se->fd = -1; ++ se->vu_listen_fd = -1; + se->conn.max_write = UINT_MAX; + se->conn.max_readahead = UINT_MAX; + +@@ -2212,8 +2215,13 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + +- if (!se->vu_socket_path) { +- fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); ++ if (!se->vu_socket_path && se->vu_listen_fd < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n"); ++ goto out4; ++ } ++ if (se->vu_socket_path && se->vu_listen_fd >= 0) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: --socket-path and --fd cannot be given together\n"); + goto out4; + } + +@@ -2253,7 +2261,7 @@ void fuse_session_unmount(struct fuse_session *se) + + int fuse_lowlevel_is_virtio(struct fuse_session *se) + { +- return se->vu_socket_path != NULL; ++ return !!se->virtio_dev; + } + + #ifdef linux +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 7e2711b..635f877 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -638,18 +638,21 @@ int virtio_loop(struct fuse_session *se) + return 0; + } + +-int virtio_session_mount(struct fuse_session *se) ++static int fv_create_listen_socket(struct fuse_session *se) + { + struct sockaddr_un un; + mode_t old_umask; + ++ /* Nothing to do if fd is already initialized */ ++ if (se->vu_listen_fd >= 0) { ++ return 0; ++ } ++ + if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { + fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); + return -1; + } + +- se->fd = -1; +- + /* + * Create the Unix socket to communicate with qemu + * based on QEMU's vhost-user-bridge +@@ -682,15 +685,31 @@ int virtio_session_mount(struct fuse_session *se) + return -1; + } + ++ se->vu_listen_fd = listen_sock; ++ return 0; ++} ++ ++int virtio_session_mount(struct fuse_session *se) ++{ ++ int ret; ++ ++ ret = fv_create_listen_socket(se); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ se->fd = -1; ++ + fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", + __func__); +- int data_sock = accept(listen_sock, NULL, NULL); ++ int data_sock = accept(se->vu_listen_fd, NULL, NULL); + if (data_sock == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); +- close(listen_sock); ++ close(se->vu_listen_fd); + return -1; + } +- close(listen_sock); ++ close(se->vu_listen_fd); ++ se->vu_listen_fd = -1; + fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", + __func__); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch b/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch new file mode 100644 index 0000000..b874dc9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch @@ -0,0 +1,134 @@ +From 1b0edd3d0a2ee5c097bcf3501c1dfa937f02e473 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:21 +0100 +Subject: [PATCH 050/116] virtiofsd: add fuse_mbuf_iter API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-47-dgilbert@redhat.com> +Patchwork-id: 93502 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 046/112] virtiofsd: add fuse_mbuf_iter API +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce an API for consuming bytes from a buffer with size checks. +All FUSE operations will be converted to use this safe API instead of +void *inarg. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit dad157e880416ab3a0e45beaa0e81977516568bc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 28 +++++++++++++++++++++++++ + tools/virtiofsd/fuse_common.h | 49 ++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 76 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 772efa9..42a608f 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -267,3 +267,31 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + + return copied; + } ++ ++void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len) ++{ ++ void *ptr; ++ ++ if (len > iter->size - iter->pos) { ++ return NULL; ++ } ++ ++ ptr = iter->mem + iter->pos; ++ iter->pos += len; ++ return ptr; ++} ++ ++const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter) ++{ ++ const char *str = iter->mem + iter->pos; ++ size_t remaining = iter->size - iter->pos; ++ size_t i; ++ ++ for (i = 0; i < remaining; i++) { ++ if (str[i] == '\0') { ++ iter->pos += i + 1; ++ return str; ++ } ++ } ++ return NULL; ++} +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index 0cb33ac..f8f6433 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -703,10 +703,57 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + */ + ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); + ++/** ++ * Memory buffer iterator ++ * ++ */ ++struct fuse_mbuf_iter { ++ /** ++ * Data pointer ++ */ ++ void *mem; ++ ++ /** ++ * Total length, in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Offset from start of buffer ++ */ ++ size_t pos; ++}; ++ ++/* Initialize memory buffer iterator from a fuse_buf */ ++#define FUSE_MBUF_ITER_INIT(fbuf) \ ++ ((struct fuse_mbuf_iter){ \ ++ .mem = fbuf->mem, \ ++ .size = fbuf->size, \ ++ .pos = 0, \ ++ }) ++ ++/** ++ * Consume bytes from a memory buffer iterator ++ * ++ * @param iter memory buffer iterator ++ * @param len number of bytes to consume ++ * @return pointer to start of consumed bytes or ++ * NULL if advancing beyond end of buffer ++ */ ++void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len); ++ ++/** ++ * Consume a NUL-terminated string from a memory buffer iterator ++ * ++ * @param iter memory buffer iterator ++ * @return pointer to the string or ++ * NULL if advancing beyond end of buffer or there is no NUL-terminator ++ */ ++const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter); ++ + /* + * Signal handling + */ +- + /** + * Exit session on HUP, TERM and INT signals and ignore PIPE signal + * +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch b/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch new file mode 100644 index 0000000..bdef115 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch @@ -0,0 +1,88 @@ +From 7a3c94e10b087c06635ef72aadb1550184dd5c58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:58 +0100 +Subject: [PATCH 087/116] virtiofsd: add helper for lo_data cleanup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-84-dgilbert@redhat.com> +Patchwork-id: 93538 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 083/112] virtiofsd: add helper for lo_data cleanup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +This offers an helper function for lo_data's cleanup. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 18a69cbbb6a4caa7c2040c6db4a33b044a32be7e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 056ebe8..e8dc5c7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2407,6 +2407,26 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b) + return la->ino == lb->ino && la->dev == lb->dev; + } + ++static void fuse_lo_data_cleanup(struct lo_data *lo) ++{ ++ if (lo->inodes) { ++ g_hash_table_destroy(lo->inodes); ++ } ++ lo_map_destroy(&lo->fd_map); ++ lo_map_destroy(&lo->dirp_map); ++ lo_map_destroy(&lo->ino_map); ++ ++ if (lo->proc_self_fd >= 0) { ++ close(lo->proc_self_fd); ++ } ++ ++ if (lo->root.fd >= 0) { ++ close(lo->root.fd); ++ } ++ ++ free(lo->source); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2554,22 +2574,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + +- if (lo.inodes) { +- g_hash_table_destroy(lo.inodes); +- } +- lo_map_destroy(&lo.fd_map); +- lo_map_destroy(&lo.dirp_map); +- lo_map_destroy(&lo.ino_map); +- +- if (lo.proc_self_fd >= 0) { +- close(lo.proc_self_fd); +- } +- +- if (lo.root.fd >= 0) { +- close(lo.root.fd); +- } +- +- free(lo.source); ++ fuse_lo_data_cleanup(&lo); + + return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch b/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch new file mode 100644 index 0000000..5e81663 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch @@ -0,0 +1,46 @@ +From c55995c25f60168e3cb6b5bae1bf9a47813383d0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:55 +0100 +Subject: [PATCH 024/116] virtiofsd: add -o source=PATH to help output +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-21-dgilbert@redhat.com> +Patchwork-id: 93474 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 020/112] virtiofsd: add -o source=PATH to help output +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +The -o source=PATH option will be used by most command-line invocations. +Let's document it! + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4ff075f72be2f489c8998ae492ec5cdbbbd73e07) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 26ac870..fc9b264 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1319,6 +1319,7 @@ int main(int argc, char *argv[]) + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); ++ printf(" -o source=PATH shared directory tree\n"); + fuse_lowlevel_help(); + ret = 0; + goto err_out1; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch b/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch new file mode 100644 index 0000000..b57e408 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch @@ -0,0 +1,121 @@ +From 23d81ee7564084f29e32fedaed5196ae1a5a3240 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:10 +0100 +Subject: [PATCH 039/116] virtiofsd: add --print-capabilities option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-36-dgilbert@redhat.com> +Patchwork-id: 93486 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 035/112] virtiofsd: add --print-capabilities option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Add the --print-capabilities option as per vhost-user.rst "Backend +programs conventions". Currently there are no advertised features. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 45018fbb0a73ce66fd3dd87ecd2872b45658add4) +Signed-off-by: Miroslav Rezanina +--- + docs/interop/vhost-user.json | 4 +++- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 2 ++ + tools/virtiofsd/passthrough_ll.c | 12 ++++++++++++ + 4 files changed, 18 insertions(+), 1 deletion(-) + +diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json +index da6aaf5..d4ea1f7 100644 +--- a/docs/interop/vhost-user.json ++++ b/docs/interop/vhost-user.json +@@ -31,6 +31,7 @@ + # @rproc-serial: virtio remoteproc serial link + # @scsi: virtio scsi + # @vsock: virtio vsock transport ++# @fs: virtio fs (since 4.2) + # + # Since: 4.0 + ## +@@ -50,7 +51,8 @@ + 'rpmsg', + 'rproc-serial', + 'scsi', +- 'vsock' ++ 'vsock', ++ 'fs' + ] + } + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index f6b3470..0d61df8 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1794,6 +1794,7 @@ struct fuse_cmdline_opts { + int nodefault_subtype; + int show_version; + int show_help; ++ int print_capabilities; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index a3645fc..b8ec5ac 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -40,6 +40,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("--help", show_help), + FUSE_HELPER_OPT("-V", show_version), + FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("--print-capabilities", print_capabilities), + FUSE_HELPER_OPT("-d", debug), + FUSE_HELPER_OPT("debug", debug), + FUSE_HELPER_OPT("-d", foreground), +@@ -135,6 +136,7 @@ void fuse_cmdline_help(void) + { + printf(" -h --help print help\n" + " -V --version print version\n" ++ " --print-capabilities print vhost-user.json\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" + " --daemonize run in background\n" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 037c5d7..cd27c09 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1298,6 +1298,14 @@ static struct fuse_lowlevel_ops lo_oper = { + .lseek = lo_lseek, + }; + ++/* Print vhost-user.json backend program capabilities */ ++static void print_capabilities(void) ++{ ++ printf("{\n"); ++ printf(" \"type\": \"fs\"\n"); ++ printf("}\n"); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -1328,6 +1336,10 @@ int main(int argc, char *argv[]) + fuse_lowlevel_version(); + ret = 0; + goto err_out1; ++ } else if (opts.print_capabilities) { ++ print_capabilities(); ++ ret = 0; ++ goto err_out1; + } + + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch b/SOURCES/kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch new file mode 100644 index 0000000..a6a9cc9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch @@ -0,0 +1,164 @@ +From 555ec3463b3dbfd6e08eac7840419d176f113e46 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 5 May 2020 16:35:55 +0100 +Subject: [PATCH 4/9] virtiofsd: add --rlimit-nofile=NUM option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200505163600.22956-3-dgilbert@redhat.com> +Patchwork-id: 96270 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/7] virtiofsd: add --rlimit-nofile=NUM option +Bugzilla: 1817445 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Max Reitz +RH-Acked-by: Michael S. Tsirkin + +From: Stefan Hajnoczi + +Make it possible to specify the RLIMIT_NOFILE on the command-line. +Users running multiple virtiofsd processes should allocate a certain +number to each process so that the system-wide limit can never be +exhausted. + +When this option is set to 0 the rlimit is left at its current value. +This is useful when a management tool wants to configure the rlimit +itself. + +The default behavior remains unchanged: try to set the limit to +1,000,000 file descriptors if the current rlimit is lower. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Dr. David Alan Gilbert +Message-Id: <20200501140644.220940-2-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 6dbb716877728ce4eb51619885ef6ef4ada9565f) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 23 +++++++++++++++++++++++ + tools/virtiofsd/passthrough_ll.c | 22 ++++++++-------------- + 3 files changed, 32 insertions(+), 14 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 8f6d705..562fd52 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1777,6 +1777,7 @@ struct fuse_cmdline_opts { + int syslog; + int log_level; + unsigned int max_idle_threads; ++ unsigned long rlimit_nofile; + }; + + /** +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 0801cf7..9b3eddc 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -23,6 +23,8 @@ + #include + #include + #include ++#include ++#include + #include + + #define FUSE_HELPER_OPT(t, p) \ +@@ -53,6 +55,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("subtype=", nodefault_subtype), + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_HELPER_OPT("--rlimit-nofile=%lu", rlimit_nofile), + FUSE_HELPER_OPT("--syslog", syslog), + FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), + FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), +@@ -171,6 +174,9 @@ void fuse_cmdline_help(void) + " default: no_writeback\n" + " -o xattr|no_xattr enable/disable xattr\n" + " default: no_xattr\n" ++ " --rlimit-nofile= set maximum number of file descriptors\n" ++ " (0 leaves rlimit unchanged)\n" ++ " default: 1,000,000 if the current rlimit is lower\n" + ); + } + +@@ -191,11 +197,28 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, + } + } + ++static unsigned long get_default_rlimit_nofile(void) ++{ ++ rlim_t max_fds = 1000000; /* our default RLIMIT_NOFILE target */ ++ struct rlimit rlim; ++ ++ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { ++ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); ++ exit(1); ++ } ++ ++ if (rlim.rlim_cur >= max_fds) { ++ return 0; /* we have more fds available than required! */ ++ } ++ return max_fds; ++} ++ + int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + { + memset(opts, 0, sizeof(struct fuse_cmdline_opts)); + + opts->max_idle_threads = 10; ++ opts->rlimit_nofile = get_default_rlimit_nofile(); + opts->foreground = 1; + + if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 50ff672..184ad0f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2711,24 +2711,18 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, + setup_seccomp(enable_syslog); + } + +-/* Raise the maximum number of open file descriptors */ +-static void setup_nofile_rlimit(void) ++/* Set the maximum number of open file descriptors */ ++static void setup_nofile_rlimit(unsigned long rlimit_nofile) + { +- const rlim_t max_fds = 1000000; +- struct rlimit rlim; +- +- if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { +- fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); +- exit(1); +- } ++ struct rlimit rlim = { ++ .rlim_cur = rlimit_nofile, ++ .rlim_max = rlimit_nofile, ++ }; + +- if (rlim.rlim_cur >= max_fds) { ++ if (rlimit_nofile == 0) { + return; /* nothing to do */ + } + +- rlim.rlim_cur = max_fds; +- rlim.rlim_max = max_fds; +- + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { + /* Ignore SELinux denials */ + if (errno == EPERM) { +@@ -2981,7 +2975,7 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + +- setup_nofile_rlimit(); ++ setup_nofile_rlimit(opts.rlimit_nofile); + + /* Must be before sandbox since it wants /proc */ + setup_capng(); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch b/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch new file mode 100644 index 0000000..b34108e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch @@ -0,0 +1,285 @@ +From 58c4e9473b364fb62aac797b0d69fd8ddb02c8c7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:30 +0100 +Subject: [PATCH 059/116] virtiofsd: add seccomp whitelist +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-56-dgilbert@redhat.com> +Patchwork-id: 93511 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 055/112] virtiofsd: add seccomp whitelist +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Only allow system calls that are needed by virtiofsd. All other system +calls cause SIGSYS to be directed at the thread and the process will +coredump. + +Restricting system calls reduces the kernel attack surface and limits +what the process can do when compromised. + +Signed-off-by: Stefan Hajnoczi +with additional entries by: +Signed-off-by: Ganesh Maharaj Mahalingam +Signed-off-by: Masayoshi Mizuma +Signed-off-by: Misono Tomohiro +Signed-off-by: piaojun +Signed-off-by: Vivek Goyal +Signed-off-by: Eric Ren +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f8bde99c175ffd86b5125098a4707d43f5e80c6) + +Signed-off-by: Miroslav Rezanina +--- + Makefile | 5 +- + tools/virtiofsd/Makefile.objs | 5 +- + tools/virtiofsd/passthrough_ll.c | 2 + + tools/virtiofsd/seccomp.c | 151 +++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/seccomp.h | 14 ++++ + 5 files changed, 174 insertions(+), 3 deletions(-) + create mode 100644 tools/virtiofsd/seccomp.c + create mode 100644 tools/virtiofsd/seccomp.h + +diff --git a/Makefile b/Makefile +index 0e9755d..6879a06 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,7 +330,7 @@ endif + endif + endif + +-ifdef CONFIG_LINUX ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) + HELPERS-y += virtiofsd$(EXESUF) + vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif +@@ -681,7 +681,8 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" + rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + +-ifdef CONFIG_LINUX # relies on Linux-specific syscalls ++# relies on Linux-specific syscalls ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) + virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) + $(call LINK, $^) + endif +diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs +index 45a8075..076f667 100644 +--- a/tools/virtiofsd/Makefile.objs ++++ b/tools/virtiofsd/Makefile.objs +@@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \ + fuse_signals.o \ + fuse_virtio.o \ + helper.o \ +- passthrough_ll.o ++ passthrough_ll.o \ ++ seccomp.o + ++seccomp.o-cflags := $(SECCOMP_CFLAGS) ++seccomp.o-libs := $(SECCOMP_LIBS) +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0947d14..bd8925b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -59,6 +59,7 @@ + #include + + #include "passthrough_helpers.h" ++#include "seccomp.h" + + struct lo_map_elem { + union { +@@ -2091,6 +2092,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + { + setup_namespaces(lo, se); + setup_mounts(lo->source); ++ setup_seccomp(); + } + + int main(int argc, char *argv[]) +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +new file mode 100644 +index 0000000..691fb63 +--- /dev/null ++++ b/tools/virtiofsd/seccomp.c +@@ -0,0 +1,151 @@ ++/* ++ * Seccomp sandboxing for virtiofsd ++ * ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "seccomp.h" ++#include "fuse_i.h" ++#include "fuse_log.h" ++#include ++#include ++#include ++#include ++ ++/* Bodge for libseccomp 2.4.2 which broke ppoll */ ++#if !defined(__SNR_ppoll) && defined(__SNR_brk) ++#ifdef __NR_ppoll ++#define __SNR_ppoll __NR_ppoll ++#else ++#define __SNR_ppoll __PNR_ppoll ++#endif ++#endif ++ ++static const int syscall_whitelist[] = { ++ /* TODO ireg sem*() syscalls */ ++ SCMP_SYS(brk), ++ SCMP_SYS(capget), /* For CAP_FSETID */ ++ SCMP_SYS(capset), ++ SCMP_SYS(clock_gettime), ++ SCMP_SYS(clone), ++#ifdef __NR_clone3 ++ SCMP_SYS(clone3), ++#endif ++ SCMP_SYS(close), ++ SCMP_SYS(copy_file_range), ++ SCMP_SYS(dup), ++ SCMP_SYS(eventfd2), ++ SCMP_SYS(exit), ++ SCMP_SYS(exit_group), ++ SCMP_SYS(fallocate), ++ SCMP_SYS(fchmodat), ++ SCMP_SYS(fchownat), ++ SCMP_SYS(fcntl), ++ SCMP_SYS(fdatasync), ++ SCMP_SYS(fgetxattr), ++ SCMP_SYS(flistxattr), ++ SCMP_SYS(flock), ++ SCMP_SYS(fremovexattr), ++ SCMP_SYS(fsetxattr), ++ SCMP_SYS(fstat), ++ SCMP_SYS(fstatfs), ++ SCMP_SYS(fsync), ++ SCMP_SYS(ftruncate), ++ SCMP_SYS(futex), ++ SCMP_SYS(getdents), ++ SCMP_SYS(getdents64), ++ SCMP_SYS(getegid), ++ SCMP_SYS(geteuid), ++ SCMP_SYS(getpid), ++ SCMP_SYS(gettid), ++ SCMP_SYS(gettimeofday), ++ SCMP_SYS(linkat), ++ SCMP_SYS(lseek), ++ SCMP_SYS(madvise), ++ SCMP_SYS(mkdirat), ++ SCMP_SYS(mknodat), ++ SCMP_SYS(mmap), ++ SCMP_SYS(mprotect), ++ SCMP_SYS(mremap), ++ SCMP_SYS(munmap), ++ SCMP_SYS(newfstatat), ++ SCMP_SYS(open), ++ SCMP_SYS(openat), ++ SCMP_SYS(ppoll), ++ SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ ++ SCMP_SYS(preadv), ++ SCMP_SYS(pread64), ++ SCMP_SYS(pwritev), ++ SCMP_SYS(pwrite64), ++ SCMP_SYS(read), ++ SCMP_SYS(readlinkat), ++ SCMP_SYS(recvmsg), ++ SCMP_SYS(renameat), ++ SCMP_SYS(renameat2), ++ SCMP_SYS(rt_sigaction), ++ SCMP_SYS(rt_sigprocmask), ++ SCMP_SYS(rt_sigreturn), ++ SCMP_SYS(sendmsg), ++ SCMP_SYS(setresgid), ++ SCMP_SYS(setresuid), ++#ifdef __NR_setresgid32 ++ SCMP_SYS(setresgid32), ++#endif ++#ifdef __NR_setresuid32 ++ SCMP_SYS(setresuid32), ++#endif ++ SCMP_SYS(set_robust_list), ++ SCMP_SYS(symlinkat), ++ SCMP_SYS(time), /* Rarely needed, except on static builds */ ++ SCMP_SYS(tgkill), ++ SCMP_SYS(unlinkat), ++ SCMP_SYS(utimensat), ++ SCMP_SYS(write), ++ SCMP_SYS(writev), ++}; ++ ++void setup_seccomp(void) ++{ ++ scmp_filter_ctx ctx; ++ size_t i; ++ ++#ifdef SCMP_ACT_KILL_PROCESS ++ ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); ++ /* Handle a newer libseccomp but an older kernel */ ++ if (!ctx && errno == EOPNOTSUPP) { ++ ctx = seccomp_init(SCMP_ACT_TRAP); ++ } ++#else ++ ctx = seccomp_init(SCMP_ACT_TRAP); ++#endif ++ if (!ctx) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); ++ exit(1); ++ } ++ ++ for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { ++ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, ++ syscall_whitelist[i], 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", ++ syscall_whitelist[i]); ++ exit(1); ++ } ++ } ++ ++ /* libvhost-user calls this for post-copy migration, we don't need it */ ++ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), ++ SCMP_SYS(userfaultfd), 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); ++ exit(1); ++ } ++ ++ if (seccomp_load(ctx) < 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); ++ exit(1); ++ } ++ ++ seccomp_release(ctx); ++} +diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h +new file mode 100644 +index 0000000..86bce72 +--- /dev/null ++++ b/tools/virtiofsd/seccomp.h +@@ -0,0 +1,14 @@ ++/* ++ * Seccomp sandboxing for virtiofsd ++ * ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef VIRTIOFSD_SECCOMP_H ++#define VIRTIOFSD_SECCOMP_H ++ ++void setup_seccomp(void); ++ ++#endif /* VIRTIOFSD_SECCOMP_H */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch b/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch new file mode 100644 index 0000000..ac6dc54 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch @@ -0,0 +1,74 @@ +From 6d62abb99b6b918f05f099b01a99f4326a69d650 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:26 +0100 +Subject: [PATCH 115/116] virtiofsd: add some options to the help message +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-112-dgilbert@redhat.com> +Patchwork-id: 93565 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 111/112] virtiofsd: add some options to the help message +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +Add following options to the help message: +- cache +- flock|no_flock +- norace +- posix_lock|no_posix_lock +- readdirplus|no_readdirplus +- timeout +- writeback|no_writeback +- xattr|no_xattr + +Signed-off-by: Masayoshi Mizuma + +dgilbert: Split cache, norace, posix_lock, readdirplus off + into our own earlier patches that added the options + +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1d59b1b210d7c3b0bdf4b10ebe0bb1fccfcb8b95) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index f98d8f2..0801cf7 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -148,6 +148,8 @@ void fuse_cmdline_help(void) + " -o cache= cache mode. could be one of \"auto, " + "always, none\"\n" + " default: auto\n" ++ " -o flock|no_flock enable/disable flock\n" ++ " default: no_flock\n" + " -o log_level= log level, default to \"info\"\n" + " level could be one of \"debug, " + "info, warn, err\"\n" +@@ -163,7 +165,13 @@ void fuse_cmdline_help(void) + " enable/disable readirplus\n" + " default: readdirplus except with " + "cache=none\n" +- ); ++ " -o timeout= I/O timeout (second)\n" ++ " default: depends on cache= option.\n" ++ " -o writeback|no_writeback enable/disable writeback cache\n" ++ " default: no_writeback\n" ++ " -o xattr|no_xattr enable/disable xattr\n" ++ " default: no_xattr\n" ++ ); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch b/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch new file mode 100644 index 0000000..5b55342 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch @@ -0,0 +1,239 @@ +From 6f5cf644bebc189bdb16f1caf3d7c47835d7c287 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:36 +0100 +Subject: [PATCH 065/116] virtiofsd: add --syslog command-line option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-62-dgilbert@redhat.com> +Patchwork-id: 93509 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 061/112] virtiofsd: add --syslog command-line option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Sometimes collecting output from stderr is inconvenient or does not fit +within the overall logging architecture. Add syslog(3) support for +cases where stderr cannot be used. + +Signed-off-by: Stefan Hajnoczi +dgilbert: Reworked as a logging function +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f185621d41f03a23b55795b89e6584253fa23505) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 2 ++ + tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++--- + tools/virtiofsd/seccomp.c | 32 +++++++++++++++++-------- + tools/virtiofsd/seccomp.h | 4 +++- + 5 files changed, 76 insertions(+), 13 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 0d61df8..f2750bc 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts { + int show_version; + int show_help; + int print_capabilities; ++ int syslog; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5531425..9692ef9 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -54,6 +54,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("subtype=", nodefault_subtype), + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_HELPER_OPT("--syslog", syslog), + FUSE_OPT_END + }; + +@@ -138,6 +139,7 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " --print-capabilities print vhost-user.json\n" + " -d -o debug enable debug output (implies -f)\n" ++ " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c281d81..0372aca 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -58,6 +58,7 @@ + #include + #include + #include ++#include + #include + + #include "passthrough_helpers.h" +@@ -138,6 +139,7 @@ static const struct fuse_opt lo_opts[] = { + { "norace", offsetof(struct lo_data, norace), 1 }, + FUSE_OPT_END + }; ++static bool use_syslog = false; + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + +@@ -2262,11 +2264,12 @@ static void setup_mounts(const char *source) + * Lock down this process to prevent access to other processes or files outside + * source directory. This reduces the impact of arbitrary code execution bugs. + */ +-static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) ++static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, ++ bool enable_syslog) + { + setup_namespaces(lo, se); + setup_mounts(lo->source); +- setup_seccomp(); ++ setup_seccomp(enable_syslog); + } + + /* Raise the maximum number of open file descriptors */ +@@ -2298,6 +2301,42 @@ static void setup_nofile_rlimit(void) + } + } + ++static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) ++{ ++ if (use_syslog) { ++ int priority = LOG_ERR; ++ switch (level) { ++ case FUSE_LOG_EMERG: ++ priority = LOG_EMERG; ++ break; ++ case FUSE_LOG_ALERT: ++ priority = LOG_ALERT; ++ break; ++ case FUSE_LOG_CRIT: ++ priority = LOG_CRIT; ++ break; ++ case FUSE_LOG_ERR: ++ priority = LOG_ERR; ++ break; ++ case FUSE_LOG_WARNING: ++ priority = LOG_WARNING; ++ break; ++ case FUSE_LOG_NOTICE: ++ priority = LOG_NOTICE; ++ break; ++ case FUSE_LOG_INFO: ++ priority = LOG_INFO; ++ break; ++ case FUSE_LOG_DEBUG: ++ priority = LOG_DEBUG; ++ break; ++ } ++ vsyslog(priority, fmt, ap); ++ } else { ++ vfprintf(stderr, fmt, ap); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2336,6 +2375,11 @@ int main(int argc, char *argv[]) + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } ++ fuse_set_log_func(log_func); ++ use_syslog = opts.syslog; ++ if (use_syslog) { ++ openlog("virtiofsd", LOG_PID, LOG_DAEMON); ++ } + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); +@@ -2424,7 +2468,7 @@ int main(int argc, char *argv[]) + /* Must be before sandbox since it wants /proc */ + setup_capng(); + +- setup_sandbox(&lo, se); ++ setup_sandbox(&lo, se, opts.syslog); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 691fb63..2d9d4a7 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -107,11 +107,28 @@ static const int syscall_whitelist[] = { + SCMP_SYS(writev), + }; + +-void setup_seccomp(void) ++/* Syscalls used when --syslog is enabled */ ++static const int syscall_whitelist_syslog[] = { ++ SCMP_SYS(sendto), ++}; ++ ++static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len) + { +- scmp_filter_ctx ctx; + size_t i; + ++ for (i = 0; i < len; i++) { ++ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n", ++ syscalls[i]); ++ exit(1); ++ } ++ } ++} ++ ++void setup_seccomp(bool enable_syslog) ++{ ++ scmp_filter_ctx ctx; ++ + #ifdef SCMP_ACT_KILL_PROCESS + ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); + /* Handle a newer libseccomp but an older kernel */ +@@ -126,13 +143,10 @@ void setup_seccomp(void) + exit(1); + } + +- for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { +- if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, +- syscall_whitelist[i], 0) != 0) { +- fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", +- syscall_whitelist[i]); +- exit(1); +- } ++ add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist)); ++ if (enable_syslog) { ++ add_whitelist(ctx, syscall_whitelist_syslog, ++ G_N_ELEMENTS(syscall_whitelist_syslog)); + } + + /* libvhost-user calls this for post-copy migration, we don't need it */ +diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h +index 86bce72..d47c8ea 100644 +--- a/tools/virtiofsd/seccomp.h ++++ b/tools/virtiofsd/seccomp.h +@@ -9,6 +9,8 @@ + #ifndef VIRTIOFSD_SECCOMP_H + #define VIRTIOFSD_SECCOMP_H + +-void setup_seccomp(void); ++#include ++ ++void setup_seccomp(bool enable_syslog); + + #endif /* VIRTIOFSD_SECCOMP_H */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch b/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch new file mode 100644 index 0000000..0241a9d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch @@ -0,0 +1,106 @@ +From 3dbfb932288eb5a55dfdc0eebca7e4c7f0cf6f33 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:22 +0100 +Subject: [PATCH 111/116] virtiofsd: add --thread-pool-size=NUM option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-108-dgilbert@redhat.com> +Patchwork-id: 93561 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 107/112] virtiofsd: add --thread-pool-size=NUM option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Add an option to control the size of the thread pool. Requests are now +processed in parallel by default. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 951b3120dbc971f08681e1d860360e4a1e638902) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 7 ++++++- + tools/virtiofsd/fuse_virtio.c | 5 +++-- + 3 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 1447d86..4e47e58 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -72,6 +72,7 @@ struct fuse_session { + int vu_listen_fd; + int vu_socketfd; + struct fv_VuDev *virtio_dev; ++ int thread_pool_size; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 79a4031..de2e2e0 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -28,6 +28,7 @@ + #include + #include + ++#define THREAD_POOL_SIZE 64 + + #define OFFSET_MAX 0x7fffffffffffffffLL + +@@ -2519,6 +2520,7 @@ static const struct fuse_opt fuse_ll_opts[] = { + LL_OPTION("allow_root", deny_others, 1), + LL_OPTION("--socket-path=%s", vu_socket_path, 0), + LL_OPTION("--fd=%d", vu_listen_fd, 0), ++ LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0), + FUSE_OPT_END + }; + +@@ -2537,7 +2539,9 @@ void fuse_lowlevel_help(void) + printf( + " -o allow_root allow access by root\n" + " --socket-path=PATH path for the vhost-user socket\n" +- " --fd=FDNUM fd number of vhost-user socket\n"); ++ " --fd=FDNUM fd number of vhost-user socket\n" ++ " --thread-pool-size=NUM thread pool size limit (default %d)\n", ++ THREAD_POOL_SIZE); + } + + void fuse_session_destroy(struct fuse_session *se) +@@ -2591,6 +2595,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + } + se->fd = -1; + se->vu_listen_fd = -1; ++ se->thread_pool_size = THREAD_POOL_SIZE; + se->conn.max_write = UINT_MAX; + se->conn.max_readahead = UINT_MAX; + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 0dcf2ef..9f65823 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -572,10 +572,11 @@ static void *fv_queue_thread(void *opaque) + struct fv_QueueInfo *qi = opaque; + struct VuDev *dev = &qi->virtio_dev->dev; + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ struct fuse_session *se = qi->virtio_dev->se; + GThreadPool *pool; + +- pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, +- TRUE, NULL); ++ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, ++ NULL); + if (!pool) { + fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); + return NULL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch b/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch new file mode 100644 index 0000000..a24b24f --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch @@ -0,0 +1,73 @@ +From 77eb3258e76a1ac240503572d4f41d45cb832ba2 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:09 +0100 +Subject: [PATCH 038/116] virtiofsd: add vhost-user.json file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-35-dgilbert@redhat.com> +Patchwork-id: 93490 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 034/112] virtiofsd: add vhost-user.json file +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Install a vhost-user.json file describing virtiofsd. This allows +libvirt and other management tools to enumerate vhost-user backend +programs. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 315616ed50ba15a5d7236ade8a402a93898202de) +Signed-off-by: Miroslav Rezanina +--- + .gitignore | 1 + + Makefile | 1 + + tools/virtiofsd/50-qemu-virtiofsd.json.in | 5 +++++ + 3 files changed, 7 insertions(+) + create mode 100644 tools/virtiofsd/50-qemu-virtiofsd.json.in + +diff --git a/.gitignore b/.gitignore +index aefad32..d7a4f99 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -6,6 +6,7 @@ + /config-target.* + /config.status + /config-temp ++/tools/virtiofsd/50-qemu-virtiofsd.json + /elf2dmp + /trace-events-all + /trace/generated-events.h +diff --git a/Makefile b/Makefile +index 1526775..0e9755d 100644 +--- a/Makefile ++++ b/Makefile +@@ -332,6 +332,7 @@ endif + + ifdef CONFIG_LINUX + HELPERS-y += virtiofsd$(EXESUF) ++vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif + + # Sphinx does not allow building manuals into the same directory as +diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in +new file mode 100644 +index 0000000..9bcd86f +--- /dev/null ++++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in +@@ -0,0 +1,5 @@ ++{ ++ "description": "QEMU virtiofsd vhost-user-fs", ++ "type": "fs", ++ "binary": "@libexecdir@/virtiofsd" ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch b/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch new file mode 100644 index 0000000..305745d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch @@ -0,0 +1,175 @@ +From f62613d8058bcb60b26727d980a37537103b0033 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:32 +0100 +Subject: [PATCH 061/116] virtiofsd: cap-ng helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-58-dgilbert@redhat.com> +Patchwork-id: 93512 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 057/112] virtiofsd: cap-ng helpers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +libcap-ng reads /proc during capng_get_caps_process, and virtiofsd's +sandboxing doesn't have /proc mounted; thus we have to do the +caps read before we sandbox it and save/restore the state. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2405f3c0d19eb4d516a88aa4e5c54e5f9c6bbea3) +Signed-off-by: Miroslav Rezanina +--- + Makefile | 4 +-- + tools/virtiofsd/passthrough_ll.c | 72 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 74 insertions(+), 2 deletions(-) + +diff --git a/Makefile b/Makefile +index 6879a06..ff05c30 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,7 +330,7 @@ endif + endif + endif + +-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) + HELPERS-y += virtiofsd$(EXESUF) + vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif +@@ -682,7 +682,7 @@ rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + + # relies on Linux-specific syscalls +-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) + virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) + $(call LINK, $^) + endif +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index bd8925b..97e7c75 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -39,6 +39,7 @@ + #include "fuse_virtio.h" + #include "fuse_lowlevel.h" + #include ++#include + #include + #include + #include +@@ -139,6 +140,13 @@ static const struct fuse_opt lo_opts[] = { + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + ++static struct { ++ pthread_mutex_t mutex; ++ void *saved; ++} cap; ++/* That we loaded cap-ng in the current thread from the saved */ ++static __thread bool cap_loaded = 0; ++ + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); + + static int is_dot_or_dotdot(const char *name) +@@ -162,6 +170,37 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + ++/* ++ * Load capng's state from our saved state if the current thread ++ * hadn't previously been loaded. ++ * returns 0 on success ++ */ ++static int load_capng(void) ++{ ++ if (!cap_loaded) { ++ pthread_mutex_lock(&cap.mutex); ++ capng_restore_state(&cap.saved); ++ /* ++ * restore_state free's the saved copy ++ * so make another. ++ */ ++ cap.saved = capng_save_state(); ++ if (!cap.saved) { ++ fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); ++ return -EINVAL; ++ } ++ pthread_mutex_unlock(&cap.mutex); ++ ++ /* ++ * We want to use the loaded state for our pid, ++ * not the original ++ */ ++ capng_setpid(syscall(SYS_gettid)); ++ cap_loaded = true; ++ } ++ return 0; ++} ++ + static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; +@@ -2024,6 +2063,35 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + } + + /* ++ * Capture the capability state, we'll need to restore this for individual ++ * threads later; see load_capng. ++ */ ++static void setup_capng(void) ++{ ++ /* Note this accesses /proc so has to happen before the sandbox */ ++ if (capng_get_caps_process()) { ++ fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); ++ exit(1); ++ } ++ pthread_mutex_init(&cap.mutex, NULL); ++ pthread_mutex_lock(&cap.mutex); ++ cap.saved = capng_save_state(); ++ if (!cap.saved) { ++ fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); ++ exit(1); ++ } ++ pthread_mutex_unlock(&cap.mutex); ++} ++ ++static void cleanup_capng(void) ++{ ++ free(cap.saved); ++ cap.saved = NULL; ++ pthread_mutex_destroy(&cap.mutex); ++} ++ ++ ++/* + * Make the source directory our root so symlinks cannot escape and no other + * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. + */ +@@ -2216,12 +2284,16 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ /* Must be before sandbox since it wants /proc */ ++ setup_capng(); ++ + setup_sandbox(&lo, se); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + + fuse_session_unmount(se); ++ cleanup_capng(); + err_out3: + fuse_remove_signal_handlers(se); + err_out2: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch b/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch new file mode 100644 index 0000000..caa4560 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch @@ -0,0 +1,1111 @@ +From d6a0067e6c08523a8f605f775be980eaf0a23690 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:23 +0100 +Subject: [PATCH 052/116] virtiofsd: check input buffer size in fuse_lowlevel.c + ops +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-49-dgilbert@redhat.com> +Patchwork-id: 93503 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 048/112] virtiofsd: check input buffer size in fuse_lowlevel.c ops +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Each FUSE operation involves parsing the input buffer. Currently the +code assumes the input buffer is large enough for the expected +arguments. This patch uses fuse_mbuf_iter to check the size. + +Most operations are simple to convert. Some are more complicated due to +variable-length inputs or different sizes depending on the protocol +version. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 70995754416eb4491c31607fe380a83cfd25a087) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 581 +++++++++++++++++++++++++++++++--------- + 1 file changed, 456 insertions(+), 125 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 611e8b0..02e1d83 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -27,7 +28,6 @@ + #include + + +-#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + + struct fuse_pollhandle { +@@ -706,9 +706,14 @@ int fuse_reply_lseek(fuse_req_t req, off_t off) + return send_reply_ok(req, &arg, sizeof(arg)); + } + +-static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.lookup) { + req->se->op.lookup(req, nodeid, name); +@@ -717,9 +722,16 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_forget(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; ++ struct fuse_forget_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.forget) { + req->se->op.forget(req, nodeid, arg->nlookup); +@@ -729,20 +741,48 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg) ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_batch_forget_in *arg = (void *)inarg; +- struct fuse_forget_one *param = (void *)PARAM(arg); +- unsigned int i; ++ struct fuse_batch_forget_in *arg; ++ struct fuse_forget_data *forgets; ++ size_t scount; + + (void)nodeid; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_none(req); ++ return; ++ } ++ ++ /* ++ * Prevent integer overflow. The compiler emits the following warning ++ * unless we use the scount local variable: ++ * ++ * error: comparison is always false due to limited range of data type ++ * [-Werror=type-limits] ++ * ++ * This may be true on 64-bit hosts but we need this check for 32-bit ++ * hosts. ++ */ ++ scount = arg->count; ++ if (scount > SIZE_MAX / sizeof(forgets[0])) { ++ fuse_reply_none(req); ++ return; ++ } ++ ++ forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0])); ++ if (!forgets) { ++ fuse_reply_none(req); ++ return; ++ } ++ + if (req->se->op.forget_multi) { +- req->se->op.forget_multi(req, arg->count, +- (struct fuse_forget_data *)param); ++ req->se->op.forget_multi(req, arg->count, forgets); + } else if (req->se->op.forget) { ++ unsigned int i; ++ + for (i = 0; i < arg->count; i++) { +- struct fuse_forget_one *forget = ¶m[i]; + struct fuse_req *dummy_req; + + dummy_req = fuse_ll_alloc_req(req->se); +@@ -754,7 +794,7 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, + dummy_req->ctx = req->ctx; + dummy_req->ch = NULL; + +- req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); ++ req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup); + } + fuse_reply_none(req); + } else { +@@ -762,12 +802,19 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, + } + } + +-static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + struct fuse_file_info *fip = NULL; + struct fuse_file_info fi; + +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; ++ struct fuse_getattr_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (arg->getattr_flags & FUSE_GETATTR_FH) { + memset(&fi, 0, sizeof(fi)); +@@ -782,14 +829,21 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; +- + if (req->se->op.setattr) { ++ struct fuse_setattr_in *arg; + struct fuse_file_info *fi = NULL; + struct fuse_file_info fi_store; + struct stat stbuf; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&stbuf, 0, sizeof(stbuf)); + convert_attr(arg, &stbuf); + if (arg->valid & FATTR_FH) { +@@ -810,9 +864,16 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_access(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_access_in *arg = (struct fuse_access_in *)inarg; ++ struct fuse_access_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.access) { + req->se->op.access(req, nodeid, arg->mask); +@@ -821,9 +882,10 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- (void)inarg; ++ (void)iter; + + if (req->se->op.readlink) { + req->se->op.readlink(req, nodeid); +@@ -832,10 +894,18 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; +- char *name = PARAM(arg); ++ struct fuse_mknod_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + req->ctx.umask = arg->umask; + +@@ -846,22 +916,37 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; ++ struct fuse_mkdir_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + req->ctx.umask = arg->umask; + + if (req->se->op.mkdir) { +- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ req->se->op.mkdir(req, nodeid, name, arg->mode); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.unlink) { + req->se->op.unlink(req, nodeid, name); +@@ -870,9 +955,15 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rmdir) { + req->se->op.rmdir(req, nodeid, name); +@@ -881,10 +972,16 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; +- char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ const char *linkname = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name || !linkname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.symlink) { + req->se->op.symlink(req, linkname, nodeid, name); +@@ -893,11 +990,20 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rename(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename_in *arg; ++ const char *oldname; ++ const char *newname; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ oldname = fuse_mbuf_iter_advance_str(iter); ++ newname = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !oldname || !newname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rename) { + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); +@@ -906,11 +1012,20 @@ static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename2_in *arg; ++ const char *oldname; ++ const char *newname; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ oldname = fuse_mbuf_iter_advance_str(iter); ++ newname = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !oldname || !newname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rename) { + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +@@ -920,24 +1035,38 @@ static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_link(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_link_in *arg = (struct fuse_link_in *)inarg; ++ struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.link) { +- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ req->se->op.link(req, arg->oldnodeid, nodeid, name); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_create(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_create_in *arg = (struct fuse_create_in *)inarg; +- + if (req->se->op.create) { ++ struct fuse_create_in *arg; + struct fuse_file_info fi; +- char *name = PARAM(arg); ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; +@@ -950,11 +1079,18 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_open(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_open_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +@@ -965,13 +1101,15 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_read(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; +- + if (req->se->op.read) { ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->lock_owner; +@@ -982,11 +1120,24 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_write(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_write_in *arg; + struct fuse_file_info fi; +- char *param; ++ const char *param; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ param = fuse_mbuf_iter_advance(iter, arg->size); ++ if (!param) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -994,7 +1145,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; +- param = PARAM(arg); + + if (req->se->op.write) { + req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); +@@ -1052,11 +1202,18 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, + se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + +-static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_flush(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; ++ struct fuse_flush_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.flush = 1; +@@ -1069,19 +1226,26 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_release(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_release_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; + fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; + fi.lock_owner = arg->lock_owner; ++ + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { + fi.flock_release = 1; +- fi.lock_owner = arg->lock_owner; + } + + if (req->se->op.release) { +@@ -1091,11 +1255,19 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_fsync_in *arg; + struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ int datasync; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ datasync = arg->fsync_flags & 1; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -1111,11 +1283,18 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_open_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +@@ -1126,11 +1305,18 @@ static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1141,11 +1327,18 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1156,11 +1349,18 @@ static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_release_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; +@@ -1172,11 +1372,19 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_fsync_in *arg; + struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ int datasync; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ datasync = arg->fsync_flags & 1; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -1188,10 +1396,11 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + (void)nodeid; +- (void)inarg; ++ (void)iter; + + if (req->se->op.statfs) { + req->se->op.statfs(req, nodeid); +@@ -1204,11 +1413,25 @@ static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; +- char *name = PARAM(arg); +- char *value = name + strlen(name) + 1; ++ struct fuse_setxattr_in *arg; ++ const char *name; ++ const char *value; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ value = fuse_mbuf_iter_advance(iter, arg->size); ++ if (!value) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.setxattr) { + req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); +@@ -1217,20 +1440,36 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; ++ struct fuse_getxattr_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.getxattr) { +- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ req->se->op.getxattr(req, nodeid, name, arg->size); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; ++ struct fuse_getxattr_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.listxattr) { + req->se->op.listxattr(req, nodeid, arg->size); +@@ -1239,9 +1478,15 @@ static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.removexattr) { + req->se->op.removexattr(req, nodeid, name); +@@ -1265,12 +1510,19 @@ static void convert_fuse_file_lock(struct fuse_file_lock *fl, + flock->l_pid = fl->pid; + } + +-static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_lk_in *arg; + struct fuse_file_info fi; + struct flock flock; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->owner; +@@ -1284,12 +1536,18 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, int sleep) ++ struct fuse_mbuf_iter *iter, int sleep) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_lk_in *arg; + struct fuse_file_info fi; + struct flock flock; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->owner; +@@ -1327,14 +1585,16 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, + } + } + +-static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- do_setlk_common(req, nodeid, inarg, 0); ++ do_setlk_common(req, nodeid, iter, 0); + } + +-static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- do_setlk_common(req, nodeid, inarg, 1); ++ do_setlk_common(req, nodeid, iter, 1); + } + + static int find_interrupted(struct fuse_session *se, struct fuse_req *req) +@@ -1379,12 +1639,20 @@ static int find_interrupted(struct fuse_session *se, struct fuse_req *req) + return 0; + } + +-static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; ++ struct fuse_interrupt_in *arg; + struct fuse_session *se = req->se; + + (void)nodeid; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", + (unsigned long long)arg->unique); +@@ -1425,9 +1693,15 @@ static struct fuse_req *check_interrupt(struct fuse_session *se, + } + } + +-static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; ++ struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.bmap) { + req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); +@@ -1436,18 +1710,34 @@ static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; +- unsigned int flags = arg->flags; +- void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_ioctl_in *arg; ++ unsigned int flags; ++ void *in_buf = NULL; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ flags = arg->flags; + if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { + fuse_reply_err(req, ENOTTY); + return; + } + ++ if (arg->in_size) { ++ in_buf = fuse_mbuf_iter_advance(iter, arg->in_size); ++ if (!in_buf) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1468,11 +1758,18 @@ void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) + free(ph); + } + +-static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_poll(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; ++ struct fuse_poll_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.poll_events = arg->events; +@@ -1496,11 +1793,18 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; ++ struct fuse_fallocate_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1513,12 +1817,17 @@ static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, +- const void *inarg) ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_copy_file_range_in *arg = +- (struct fuse_copy_file_range_in *)inarg; ++ struct fuse_copy_file_range_in *arg; + struct fuse_file_info fi_in, fi_out; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi_in, 0, sizeof(fi_in)); + fi_in.fh = arg->fh_in; + +@@ -1535,11 +1844,17 @@ static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, + } + } + +-static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; ++ struct fuse_lseek_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1550,15 +1865,33 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_init(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_init_in *arg = (struct fuse_init_in *)inarg; ++ size_t compat_size = offsetof(struct fuse_init_in, max_readahead); ++ struct fuse_init_in *arg; + struct fuse_init_out outarg; + struct fuse_session *se = req->se; + size_t bufsize = se->bufsize; + size_t outargsize = sizeof(outarg); + + (void)nodeid; ++ ++ /* First consume the old fields... */ ++ arg = fuse_mbuf_iter_advance(iter, compat_size); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ /* ...and now consume the new fields. */ ++ if (arg->major == 7 && arg->minor >= 6) { ++ if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ } ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); + if (arg->major == 7 && arg->minor >= 6) { +@@ -1791,12 +2124,13 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + send_reply_ok(req, &outarg, outargsize); + } + +-static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + struct fuse_session *se = req->se; + + (void)nodeid; +- (void)inarg; ++ (void)iter; + + se->got_destroy = 1; + if (se->op.destroy) { +@@ -1976,7 +2310,7 @@ int fuse_req_interrupted(fuse_req_t req) + } + + static struct { +- void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *); + const char *name; + } fuse_ll_ops[] = { + [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, +@@ -2060,7 +2394,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf = bufv->buf; + struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); + struct fuse_in_header *in; +- const void *inarg; + struct fuse_req *req; + int err; + +@@ -2138,13 +2471,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, + } + } + +- inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { + do_write_buf(req, in->nodeid, &iter, bufv); + } else { +- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); + } +- + return; + + reply_err: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch b/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch new file mode 100644 index 0000000..b6de0a9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch @@ -0,0 +1,82 @@ +From 99ff67682ef7c5659bdc9836008541861ae313d5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:56 +0100 +Subject: [PATCH 085/116] virtiofsd: cleanup allocated resource in se +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-82-dgilbert@redhat.com> +Patchwork-id: 93533 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 081/112] virtiofsd: cleanup allocated resource in se +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +This cleans up unfreed resources in se on quiting, including +se->virtio_dev, se->vu_socket_path, se->vu_socketfd. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 61cfc44982e566c33b9d5df17858e4d5ae373873) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ + tools/virtiofsd/fuse_virtio.c | 7 +++++++ + tools/virtiofsd/fuse_virtio.h | 2 +- + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 65f91da..440508a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2532,6 +2532,13 @@ void fuse_session_destroy(struct fuse_session *se) + if (se->fd != -1) { + close(se->fd); + } ++ ++ if (se->vu_socket_path) { ++ virtio_session_close(se); ++ free(se->vu_socket_path); ++ se->vu_socket_path = NULL; ++ } ++ + free(se); + } + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 7a8774a..e7bd772 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -833,3 +833,10 @@ int virtio_session_mount(struct fuse_session *se) + + return 0; + } ++ ++void virtio_session_close(struct fuse_session *se) ++{ ++ close(se->vu_socketfd); ++ free(se->virtio_dev); ++ se->virtio_dev = NULL; ++} +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index cc676b9..1116840 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -19,7 +19,7 @@ + struct fuse_session; + + int virtio_session_mount(struct fuse_session *se); +- ++void virtio_session_close(struct fuse_session *se); + int virtio_loop(struct fuse_session *se); + + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch b/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch new file mode 100644 index 0000000..d01b000 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch @@ -0,0 +1,99 @@ +From e00543b0384fba61a9c7274c73e11a25e7ab2946 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:13 +0100 +Subject: [PATCH 102/116] virtiofsd: convert more fprintf and perror to use + fuse log infra +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-99-dgilbert@redhat.com> +Patchwork-id: 93552 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 098/112] virtiofsd: convert more fprintf and perror to use fuse log infra +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +Signed-off-by: Eryu Guan +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Misono Tomohiro +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fc1aed0bf96259d0b46b1cfea7497b7762c4ee3d) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_signals.c | 7 +++++-- + tools/virtiofsd/helper.c | 9 ++++++--- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index dc7c8ac..f18625b 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -12,6 +12,7 @@ + #include "fuse_i.h" + #include "fuse_lowlevel.h" + ++#include + #include + #include + #include +@@ -47,13 +48,15 @@ static int set_one_signal_handler(int sig, void (*handler)(int), int remove) + sa.sa_flags = 0; + + if (sigaction(sig, NULL, &old_sa) == -1) { +- perror("fuse: cannot get old signal handler"); ++ fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n", ++ strerror(errno)); + return -1; + } + + if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && + sigaction(sig, &sa, NULL) == -1) { +- perror("fuse: cannot set signal handler"); ++ fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n", ++ strerror(errno)); + return -1; + } + return 0; +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 33749bf..f98d8f2 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -208,7 +208,8 @@ int fuse_daemonize(int foreground) + char completed; + + if (pipe(waiter)) { +- perror("fuse_daemonize: pipe"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n", ++ strerror(errno)); + return -1; + } + +@@ -218,7 +219,8 @@ int fuse_daemonize(int foreground) + */ + switch (fork()) { + case -1: +- perror("fuse_daemonize: fork"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n", ++ strerror(errno)); + return -1; + case 0: + break; +@@ -228,7 +230,8 @@ int fuse_daemonize(int foreground) + } + + if (setsid() == -1) { +- perror("fuse_daemonize: setsid"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n", ++ strerror(errno)); + return -1; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch b/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch new file mode 100644 index 0000000..8c1022a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch @@ -0,0 +1,57 @@ +From 8e6473e906dfc7d2a62abaf1ec80ff461e4d201d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:12 +0100 +Subject: [PATCH 101/116] virtiofsd: do not always set FUSE_FLOCK_LOCKS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-98-dgilbert@redhat.com> +Patchwork-id: 93551 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 097/112] virtiofsd: do not always set FUSE_FLOCK_LOCKS +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Peng Tao + +Right now we always enable it regardless of given commandlines. +Fix it by setting the flag relying on the lo->flock bit. + +Signed-off-by: Peng Tao +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e468d4af5f5192ab33283464a9f6933044ce47f7) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ab16135..ccbbec1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -546,9 +546,14 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } +- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->flock) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); ++ conn->want &= ~FUSE_CAP_FLOCK_LOCKS; ++ } + } + + if (conn->capable & FUSE_CAP_POSIX_LOCKS) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch b/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch new file mode 100644 index 0000000..4f8e5ef --- /dev/null +++ b/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch @@ -0,0 +1,49 @@ +From 901c005299b0316bbca7bc190de56f6c7a2a9880 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:11 +0000 +Subject: [PATCH 15/18] virtiofsd: do_read missing NULL check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-5-dgilbert@redhat.com> +Patchwork-id: 94127 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/7] virtiofsd: do_read missing NULL check +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Missing a NULL check if the argument fetch fails. + +Fixes: Coverity CID 1413119 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 99ce9a7e60fd12b213b985343ff8fcc172de59fd) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_lowlevel.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 01c418a..704c036 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1116,6 +1116,10 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_file_info fi; + + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch b/SOURCES/kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch new file mode 100644 index 0000000..569096d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch @@ -0,0 +1,67 @@ +From 78152453940967f9ece9fe3ffc5017c669d6ec28 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 5 May 2020 16:36:00 +0100 +Subject: [PATCH 9/9] virtiofsd: drop all capabilities in the wait parent + process +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200505163600.22956-8-dgilbert@redhat.com> +Patchwork-id: 96274 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 7/7] virtiofsd: drop all capabilities in the wait parent process +Bugzilla: 1817445 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Michael S. Tsirkin + +From: Stefan Hajnoczi + +All this process does is wait for its child. No capabilities are +needed. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 66502bbca37ca7a3bfa57e82cfc03b89a7a11eae) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6358874..f41a6b0 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2535,6 +2535,17 @@ static void print_capabilities(void) + } + + /* ++ * Drop all Linux capabilities because the wait parent process only needs to ++ * sit in waitpid(2) and terminate. ++ */ ++static void setup_wait_parent_capabilities(void) ++{ ++ capng_setpid(syscall(SYS_gettid)); ++ capng_clear(CAPNG_SELECT_BOTH); ++ capng_apply(CAPNG_SELECT_BOTH); ++} ++ ++/* + * Move to a new mount, net, and pid namespaces to isolate this process. + */ + static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) +@@ -2567,6 +2578,8 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + pid_t waited; + int wstatus; + ++ setup_wait_parent_capabilities(); ++ + /* The parent waits for the child */ + do { + waited = waitpid(child, &wstatus, 0); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch b/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch new file mode 100644 index 0000000..3279a5e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch @@ -0,0 +1,47 @@ +From bc127914b29f2e4163bc7ca786e04ed955d96016 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:00 +0100 +Subject: [PATCH 089/116] virtiofsd: enable PARALLEL_DIROPS during INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-86-dgilbert@redhat.com> +Patchwork-id: 93539 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 085/112] virtiofsd: enable PARALLEL_DIROPS during INIT +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +lookup is a RO operations, PARALLEL_DIROPS can be enabled. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7ed733a3841c4d489d3bd6ca7ed23c84db119c2) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index aac282f..70568d2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2062,6 +2062,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + if (se->conn.want & FUSE_CAP_ASYNC_READ) { + outarg.flags |= FUSE_ASYNC_READ; + } ++ if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) { ++ outarg.flags |= FUSE_PARALLEL_DIROPS; ++ } + if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { + outarg.flags |= FUSE_POSIX_LOCKS; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch b/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch new file mode 100644 index 0000000..96f91a1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch @@ -0,0 +1,111 @@ +From 983b383bc4a92a9f7ecff0332cadefed2f58f502 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:50 +0100 +Subject: [PATCH 079/116] virtiofsd: extract root inode init into setup_root() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-76-dgilbert@redhat.com> +Patchwork-id: 93527 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 075/112] virtiofsd: extract root inode init into setup_root() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Inititialize the root inode in a single place. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +dgilbert: +with fix suggested by Misono Tomohiro +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 3ca8a2b1c83eb185c232a4e87abbb65495263756) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 35 +++++++++++++++++++++++++---------- + 1 file changed, 25 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 33bfb4d..9e7191e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2351,6 +2351,30 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + } + } + ++static void setup_root(struct lo_data *lo, struct lo_inode *root) ++{ ++ int fd, res; ++ struct stat stat; ++ ++ fd = open("/", O_PATH); ++ if (fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); ++ exit(1); ++ } ++ ++ res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); ++ exit(1); ++ } ++ ++ root->is_symlink = false; ++ root->fd = fd; ++ root->ino = stat.st_ino; ++ root->dev = stat.st_dev; ++ root->refcount = 2; ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2426,8 +2450,6 @@ int main(int argc, char *argv[]) + if (lo.debug) { + current_log_level = FUSE_LOG_DEBUG; + } +- lo.root.refcount = 2; +- + if (lo.source) { + struct stat stat; + int res; +@@ -2446,7 +2468,6 @@ int main(int argc, char *argv[]) + } else { + lo.source = "/"; + } +- lo.root.is_symlink = false; + if (!lo.timeout_set) { + switch (lo.cache) { + case CACHE_NEVER: +@@ -2466,13 +2487,6 @@ int main(int argc, char *argv[]) + exit(1); + } + +- lo.root.fd = open(lo.source, O_PATH); +- +- if (lo.root.fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); +- exit(1); +- } +- + se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); + if (se == NULL) { + goto err_out1; +@@ -2495,6 +2509,7 @@ int main(int argc, char *argv[]) + + setup_sandbox(&lo, se, opts.syslog); + ++ setup_root(&lo, &lo.root); + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch b/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch new file mode 100644 index 0000000..4860bec --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch @@ -0,0 +1,85 @@ +From b3cd18ab58e331d3610cf00f857d6a945f11a030 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:49 +0100 +Subject: [PATCH 078/116] virtiofsd: fail when parent inode isn't known in + lo_do_lookup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-75-dgilbert@redhat.com> +Patchwork-id: 93529 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 074/112] virtiofsd: fail when parent inode isn't known in lo_do_lookup() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +The Linux file handle APIs (struct export_operations) can access inodes +that are not attached to parents because path name traversal is not +performed. Refuse if there is no parent in lo_do_lookup(). + +Also clean up lo_do_lookup() while we're here. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9de4fab5995d115f8ebfb41d8d94a866d80a1708) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index de12e75..33bfb4d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -777,6 +777,15 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_data *lo = lo_data(req); + struct lo_inode *inode, *dir = lo_inode(req, parent); + ++ /* ++ * name_to_handle_at() and open_by_handle_at() can reach here with fuse ++ * mount point in guest, but we don't have its inode info in the ++ * ino_map. ++ */ ++ if (!dir) { ++ return ENOENT; ++ } ++ + memset(e, 0, sizeof(*e)); + e->attr_timeout = lo->timeout; + e->entry_timeout = lo->timeout; +@@ -786,7 +795,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + name = "."; + } + +- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); + if (newfd == -1) { + goto out_err; + } +@@ -796,7 +805,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out_err; + } + +- inode = lo_find(lo_data(req), &e->attr); ++ inode = lo_find(lo, &e->attr); + if (inode) { + close(newfd); + newfd = -1; +@@ -812,6 +821,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->is_symlink = S_ISLNK(e->attr.st_mode); + inode->refcount = 1; + inode->fd = newfd; ++ newfd = -1; + inode->ino = e->attr.st_ino; + inode->dev = e->attr.st_dev; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch b/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch new file mode 100644 index 0000000..a831992 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch @@ -0,0 +1,63 @@ +From 0ea1c7375d6509367399c706eb9d1e8cf79a5830 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:55 +0100 +Subject: [PATCH 084/116] virtiofsd: fix error handling in main() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-81-dgilbert@redhat.com> +Patchwork-id: 93534 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 080/112] virtiofsd: fix error handling in main() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +Neither fuse_parse_cmdline() nor fuse_opt_parse() goes to the right place +to do cleanup. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c6de804670f2255ce776263124c37f3370dc5ac1) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9ed77a1..af050c6 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2443,13 +2443,14 @@ int main(int argc, char *argv[]) + lo_map_init(&lo.fd_map); + + if (fuse_parse_cmdline(&args, &opts) != 0) { +- return 1; ++ goto err_out1; + } + fuse_set_log_func(log_func); + use_syslog = opts.syslog; + if (use_syslog) { + openlog("virtiofsd", LOG_PID, LOG_DAEMON); + } ++ + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); +@@ -2468,7 +2469,7 @@ int main(int argc, char *argv[]) + } + + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { +- return 1; ++ goto err_out1; + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch b/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch new file mode 100644 index 0000000..420a8a6 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch @@ -0,0 +1,44 @@ +From 9c291ca8624318613ede6e4174d08cf45aae8384 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:01 +0100 +Subject: [PATCH 090/116] virtiofsd: fix incorrect error handling in + lo_do_lookup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-87-dgilbert@redhat.com> +Patchwork-id: 93543 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 086/112] virtiofsd: fix incorrect error handling in lo_do_lookup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eric Ren + +Signed-off-by: Eric Ren +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fc3f0041b43b6c64aa97b3558a6abe1a10028354) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e8dc5c7..05b5f89 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -814,7 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + close(newfd); + newfd = -1; + } else { +- saverr = ENOMEM; + inode = calloc(1, sizeof(struct lo_inode)); + if (!inode) { + goto out_err; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch b/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch new file mode 100644 index 0000000..90debb0 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch @@ -0,0 +1,322 @@ +From e0d64e481e5a9fab5ff90d2a8f84afcd3311d13b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:35 +0100 +Subject: [PATCH 064/116] virtiofsd: fix libfuse information leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-61-dgilbert@redhat.com> +Patchwork-id: 93515 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 060/112] virtiofsd: fix libfuse information leaks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Some FUSE message replies contain padding fields that are not +initialized by libfuse. This is fine in traditional FUSE applications +because the kernel is trusted. virtiofsd does not trust the guest and +must not expose uninitialized memory. + +Use C struct initializers to automatically zero out memory. Not all of +these code changes are strictly necessary but they will prevent future +information leaks if the structs are extended. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 3db2876a0153ac7103c077c53090e020faffb3ea) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 150 ++++++++++++++++++++-------------------- + 1 file changed, 76 insertions(+), 74 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 2d6dc5a..6ceb33d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -44,21 +44,23 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void) + + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) + { +- attr->ino = stbuf->st_ino; +- attr->mode = stbuf->st_mode; +- attr->nlink = stbuf->st_nlink; +- attr->uid = stbuf->st_uid; +- attr->gid = stbuf->st_gid; +- attr->rdev = stbuf->st_rdev; +- attr->size = stbuf->st_size; +- attr->blksize = stbuf->st_blksize; +- attr->blocks = stbuf->st_blocks; +- attr->atime = stbuf->st_atime; +- attr->mtime = stbuf->st_mtime; +- attr->ctime = stbuf->st_ctime; +- attr->atimensec = ST_ATIM_NSEC(stbuf); +- attr->mtimensec = ST_MTIM_NSEC(stbuf); +- attr->ctimensec = ST_CTIM_NSEC(stbuf); ++ *attr = (struct fuse_attr){ ++ .ino = stbuf->st_ino, ++ .mode = stbuf->st_mode, ++ .nlink = stbuf->st_nlink, ++ .uid = stbuf->st_uid, ++ .gid = stbuf->st_gid, ++ .rdev = stbuf->st_rdev, ++ .size = stbuf->st_size, ++ .blksize = stbuf->st_blksize, ++ .blocks = stbuf->st_blocks, ++ .atime = stbuf->st_atime, ++ .mtime = stbuf->st_mtime, ++ .ctime = stbuf->st_ctime, ++ .atimensec = ST_ATIM_NSEC(stbuf), ++ .mtimensec = ST_MTIM_NSEC(stbuf), ++ .ctimensec = ST_CTIM_NSEC(stbuf), ++ }; + } + + static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) +@@ -183,16 +185,16 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .unique = req->unique, ++ .error = error, ++ }; + + if (error <= -1000 || error > 0) { + fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); + error = -ERANGE; + } + +- out.unique = req->unique; +- out.error = error; +- + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +@@ -277,14 +279,16 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, + static void convert_statfs(const struct statvfs *stbuf, + struct fuse_kstatfs *kstatfs) + { +- kstatfs->bsize = stbuf->f_bsize; +- kstatfs->frsize = stbuf->f_frsize; +- kstatfs->blocks = stbuf->f_blocks; +- kstatfs->bfree = stbuf->f_bfree; +- kstatfs->bavail = stbuf->f_bavail; +- kstatfs->files = stbuf->f_files; +- kstatfs->ffree = stbuf->f_ffree; +- kstatfs->namelen = stbuf->f_namemax; ++ *kstatfs = (struct fuse_kstatfs){ ++ .bsize = stbuf->f_bsize, ++ .frsize = stbuf->f_frsize, ++ .blocks = stbuf->f_blocks, ++ .bfree = stbuf->f_bfree, ++ .bavail = stbuf->f_bavail, ++ .files = stbuf->f_files, ++ .ffree = stbuf->f_ffree, ++ .namelen = stbuf->f_namemax, ++ }; + } + + static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) +@@ -328,12 +332,14 @@ static unsigned int calc_timeout_nsec(double t) + static void fill_entry(struct fuse_entry_out *arg, + const struct fuse_entry_param *e) + { +- arg->nodeid = e->ino; +- arg->generation = e->generation; +- arg->entry_valid = calc_timeout_sec(e->entry_timeout); +- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); +- arg->attr_valid = calc_timeout_sec(e->attr_timeout); +- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ *arg = (struct fuse_entry_out){ ++ .nodeid = e->ino, ++ .generation = e->generation, ++ .entry_valid = calc_timeout_sec(e->entry_timeout), ++ .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout), ++ .attr_valid = calc_timeout_sec(e->attr_timeout), ++ .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), ++ }; + convert_stat(&e->attr, &arg->attr); + } + +@@ -362,10 +368,12 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, + fill_entry(&dp->entry_out, e); + + struct fuse_dirent *dirent = &dp->dirent; +- dirent->ino = e->attr.st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ *dirent = (struct fuse_dirent){ ++ .ino = e->attr.st_ino, ++ .off = off, ++ .namelen = namelen, ++ .type = (e->attr.st_mode & S_IFMT) >> 12, ++ }; + memcpy(dirent->name, name, namelen); + memset(dirent->name + namelen, 0, entlen_padded - entlen); + +@@ -496,15 +504,14 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + { + struct iovec iov[2]; +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .unique = req->unique, ++ }; + int res; + + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +- out.unique = req->unique; +- out.error = 0; +- + res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); + if (res <= 0) { + fuse_free_req(req); +@@ -2145,14 +2152,14 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, + static int send_notify_iov(struct fuse_session *se, int notify_code, + struct iovec *iov, int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .error = notify_code, ++ }; + + if (!se->got_init) { + return -ENOTCONN; + } + +- out.unique = 0; +- out.error = notify_code; + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +@@ -2162,11 +2169,11 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + { + if (ph != NULL) { +- struct fuse_notify_poll_wakeup_out outarg; ++ struct fuse_notify_poll_wakeup_out outarg = { ++ .kh = ph->kh, ++ }; + struct iovec iov[2]; + +- outarg.kh = ph->kh; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +@@ -2179,17 +2186,17 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + off_t off, off_t len) + { +- struct fuse_notify_inval_inode_out outarg; ++ struct fuse_notify_inval_inode_out outarg = { ++ .ino = ino, ++ .off = off, ++ .len = len, ++ }; + struct iovec iov[2]; + + if (!se) { + return -EINVAL; + } + +- outarg.ino = ino; +- outarg.off = off; +- outarg.len = len; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +@@ -2199,17 +2206,16 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + const char *name, size_t namelen) + { +- struct fuse_notify_inval_entry_out outarg; ++ struct fuse_notify_inval_entry_out outarg = { ++ .parent = parent, ++ .namelen = namelen, ++ }; + struct iovec iov[3]; + + if (!se) { + return -EINVAL; + } + +- outarg.parent = parent; +- outarg.namelen = namelen; +- outarg.padding = 0; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + iov[2].iov_base = (void *)name; +@@ -2222,18 +2228,17 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + fuse_ino_t child, const char *name, + size_t namelen) + { +- struct fuse_notify_delete_out outarg; ++ struct fuse_notify_delete_out outarg = { ++ .parent = parent, ++ .child = child, ++ .namelen = namelen, ++ }; + struct iovec iov[3]; + + if (!se) { + return -EINVAL; + } + +- outarg.parent = parent; +- outarg.child = child; +- outarg.namelen = namelen; +- outarg.padding = 0; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + iov[2].iov_base = (void *)name; +@@ -2245,24 +2250,21 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv) + { +- struct fuse_out_header out; +- struct fuse_notify_store_out outarg; ++ struct fuse_out_header out = { ++ .error = FUSE_NOTIFY_STORE, ++ }; ++ struct fuse_notify_store_out outarg = { ++ .nodeid = ino, ++ .offset = offset, ++ .size = fuse_buf_size(bufv), ++ }; + struct iovec iov[3]; +- size_t size = fuse_buf_size(bufv); + int res; + + if (!se) { + return -EINVAL; + } + +- out.unique = 0; +- out.error = FUSE_NOTIFY_STORE; +- +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; +- + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(out); + iov[1].iov_base = &outarg; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch b/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch new file mode 100644 index 0000000..6243037 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch @@ -0,0 +1,94 @@ +From 9a44d78f5019280b006bb5b3de7164336289d639 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:21 +0100 +Subject: [PATCH 110/116] virtiofsd: fix lo_destroy() resource leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-107-dgilbert@redhat.com> +Patchwork-id: 93560 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 106/112] virtiofsd: fix lo_destroy() resource leaks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Now that lo_destroy() is serialized we can call unref_inode() so that +all inode resources are freed. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 28f7a3b026f231bfe8de5fed6a18a8d27b1dfcee) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 21 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 79b8b71..eb001b9 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1371,26 +1371,6 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + } + +-static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) +-{ +- struct lo_inode *inode = value; +- struct lo_data *lo = user_data; +- +- inode->nlookup = 0; +- lo_map_remove(&lo->ino_map, inode->fuse_ino); +- close(inode->fd); +- lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ +- +- return TRUE; +-} +- +-static void unref_all_inodes(struct lo_data *lo) +-{ +- pthread_mutex_lock(&lo->mutex); +- g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); +- pthread_mutex_unlock(&lo->mutex); +-} +- + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2477,7 +2457,26 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + static void lo_destroy(void *userdata) + { + struct lo_data *lo = (struct lo_data *)userdata; +- unref_all_inodes(lo); ++ ++ /* ++ * Normally lo->mutex must be taken when traversing lo->inodes but ++ * lo_destroy() is a serialized request so no races are possible here. ++ * ++ * In addition, we cannot acquire lo->mutex since unref_inode() takes it ++ * too and this would result in a recursive lock. ++ */ ++ while (true) { ++ GHashTableIter iter; ++ gpointer key, value; ++ ++ g_hash_table_iter_init(&iter, lo->inodes); ++ if (!g_hash_table_iter_next(&iter, &key, &value)) { ++ break; ++ } ++ ++ struct lo_inode *inode = value; ++ unref_inode_lolocked(lo, inode, inode->nlookup); ++ } + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch b/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch new file mode 100644 index 0000000..4d7d6dc --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch @@ -0,0 +1,66 @@ +From 9e0f5b64f30c2f841f297e25c2f3a6d82c8a16b8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:57 +0100 +Subject: [PATCH 086/116] virtiofsd: fix memory leak on lo.source +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-83-dgilbert@redhat.com> +Patchwork-id: 93536 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 082/112] virtiofsd: fix memory leak on lo.source +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +valgrind reported that lo.source is leaked on quiting, but it was defined +as (const char*) as it may point to a const string "/". + +Signed-off-by: Liu Bo +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit eb68a33b5fc5dde87bd9b99b94e7c33a5d8ea82e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index af050c6..056ebe8 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -115,7 +115,7 @@ struct lo_data { + int writeback; + int flock; + int xattr; +- const char *source; ++ char *source; + double timeout; + int cache; + int timeout_set; +@@ -2497,9 +2497,8 @@ int main(int argc, char *argv[]) + fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); + exit(1); + } +- + } else { +- lo.source = "/"; ++ lo.source = strdup("/"); + } + if (!lo.timeout_set) { + switch (lo.cache) { +@@ -2570,5 +2569,7 @@ err_out1: + close(lo.root.fd); + } + ++ free(lo.source); ++ + return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch b/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch new file mode 100644 index 0000000..b17d93c --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch @@ -0,0 +1,56 @@ +From 3b6461ee08654b2cbb6d4e0cc15c02f89a6610d5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:09 +0000 +Subject: [PATCH 13/18] virtiofsd: fv_create_listen_socket error path socket + leak +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-3-dgilbert@redhat.com> +Patchwork-id: 94124 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/7] virtiofsd: fv_create_listen_socket error path socket leak +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +If we fail when bringing up the socket we can leak the listen_fd; +in practice the daemon will exit so it's not really a problem. + +Fixes: Coverity CID 1413121 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 6fa249027f97e3080f3d9c0fab3f94f8f80828fe) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_virtio.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 80a6e92..dd1c605 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -916,6 +916,7 @@ static int fv_create_listen_socket(struct fuse_session *se) + old_umask = umask(0077); + if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); ++ close(listen_sock); + umask(old_umask); + return -1; + } +@@ -923,6 +924,7 @@ static int fv_create_listen_socket(struct fuse_session *se) + + if (listen(listen_sock, 1) == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); ++ close(listen_sock); + return -1; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch b/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch new file mode 100644 index 0000000..fcb5ca2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch @@ -0,0 +1,66 @@ +From 59bfe3ad924d00dc9c7a4363fcd3db36ea247988 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:59 +0100 +Subject: [PATCH 028/116] virtiofsd: get/set features callbacks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-25-dgilbert@redhat.com> +Patchwork-id: 93478 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 024/112] virtiofsd: get/set features callbacks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add the get/set features callbacks. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f2cef5fb9ae20136ca18d16328787b69b3abfa18) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 1928a20..4819e56 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -46,6 +46,17 @@ struct virtio_fs_config { + uint32_t num_queues; + }; + ++/* Callback from libvhost-user */ ++static uint64_t fv_get_features(VuDev *dev) ++{ ++ return 1ULL << VIRTIO_F_VERSION_1; ++} ++ ++/* Callback from libvhost-user */ ++static void fv_set_features(VuDev *dev, uint64_t features) ++{ ++} ++ + /* + * Callback from libvhost-user if there's a new fd we're supposed to listen + * to, typically a queue kick? +@@ -78,7 +89,9 @@ static bool fv_queue_order(VuDev *dev, int qidx) + } + + static const VuDevIface fv_iface = { +- /* TODO: Add other callbacks */ ++ .get_features = fv_get_features, ++ .set_features = fv_set_features, ++ + .queue_is_processed_in_order = fv_queue_order, + }; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch b/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch new file mode 100644 index 0000000..68d20e7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch @@ -0,0 +1,589 @@ +From da6ee5c24397d2ca93dfaf275fdd9dafc922da15 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:11 +0100 +Subject: [PATCH 100/116] virtiofsd: introduce inode refcount to prevent + use-after-free +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-97-dgilbert@redhat.com> +Patchwork-id: 93550 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 096/112] virtiofsd: introduce inode refcount to prevent use-after-free +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +If thread A is using an inode it must not be deleted by thread B when +processing a FUSE_FORGET request. + +The FUSE protocol itself already has a counter called nlookup that is +used in FUSE_FORGET messages. We cannot trust this counter since the +untrusted client can manipulate it via FUSE_FORGET messages. + +Introduce a new refcount to keep inodes alive for the required lifespan. +lo_inode_put() must be called to release a reference. FUSE's nlookup +counter holds exactly one reference so that the inode stays alive as +long as the client still wants to remember it. + +Note that the lo_inode->is_symlink field is moved to avoid creating a +hole in the struct due to struct field alignment. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 169 +++++++++++++++++++++++++++++++++------ + 1 file changed, 146 insertions(+), 23 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e3a6d6b..ab16135 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -97,7 +97,13 @@ struct lo_key { + + struct lo_inode { + int fd; +- bool is_symlink; ++ ++ /* ++ * Atomic reference count for this object. The nlookup field holds a ++ * reference and release it when nlookup reaches 0. ++ */ ++ gint refcount; ++ + struct lo_key key; + + /* +@@ -116,6 +122,8 @@ struct lo_inode { + fuse_ino_t fuse_ino; + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ ++ ++ bool is_symlink; + }; + + struct lo_cred { +@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) + return elem - lo_data(req)->ino_map.elems; + } + ++static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) ++{ ++ struct lo_inode *inode = *inodep; ++ ++ if (!inode) { ++ return; ++ } ++ ++ *inodep = NULL; ++ ++ if (g_atomic_int_dec_and_test(&inode->refcount)) { ++ close(inode->fd); ++ free(inode); ++ } ++} ++ ++/* Caller must release refcount using lo_inode_put() */ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { + struct lo_data *lo = lo_data(req); +@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + + pthread_mutex_lock(&lo->mutex); + elem = lo_map_get(&lo->ino_map, ino); ++ if (elem) { ++ g_atomic_int_inc(&elem->inode->refcount); ++ } + pthread_mutex_unlock(&lo->mutex); + + if (!elem) { +@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + return elem->inode; + } + ++/* ++ * TODO Remove this helper and force callers to hold an inode refcount until ++ * they are done with the fd. This will be done in a later patch to make ++ * review easier. ++ */ + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { + struct lo_inode *inode = lo_inode(req, ino); +- return inode ? inode->fd : -1; ++ int fd; ++ ++ if (!inode) { ++ return -1; ++ } ++ ++ fd = inode->fd; ++ lo_inode_put(lo_data(req), &inode); ++ return fd; + } + + static void lo_init(void *userdata, struct fuse_conn_info *conn) +@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, + fuse_reply_attr(req, &buf, lo->timeout); + } + ++/* ++ * Increments parent->nlookup and caller must release refcount using ++ * lo_inode_put(&parent). ++ */ + static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, + char path[PATH_MAX], struct lo_inode **parent) + { +@@ -582,6 +627,7 @@ retry: + p = &lo->root; + pthread_mutex_lock(&lo->mutex); + p->nlookup++; ++ g_atomic_int_inc(&p->refcount); + pthread_mutex_unlock(&lo->mutex); + } else { + *last = '\0'; +@@ -625,6 +671,7 @@ retry: + + fail_unref: + unref_inode_lolocked(lo, p, 1); ++ lo_inode_put(lo, &p); + fail: + if (retries) { + retries--; +@@ -663,6 +710,7 @@ fallback: + if (res != -1) { + res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); + unref_inode_lolocked(lo, parent, 1); ++ lo_inode_put(lo, &parent); + } + + return res; +@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + goto out_err; + } + } ++ lo_inode_put(lo, &inode); + + return lo_getattr(req, ino, fi); + + out_err: + saverr = errno; ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + if (p) { + assert(p->nlookup > 0); + p->nlookup++; ++ g_atomic_int_inc(&p->refcount); + } + pthread_mutex_unlock(&lo->mutex); + +@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data) + free(plock); + } + ++/* ++ * Increments nlookup and caller must release refcount using ++ * lo_inode_put(&parent). ++ */ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct fuse_entry_param *e) + { +@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + int res; + int saverr; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode, *dir = lo_inode(req, parent); ++ struct lo_inode *inode = NULL; ++ struct lo_inode *dir = lo_inode(req, parent); + + /* + * name_to_handle_at() and open_by_handle_at() can reach here with fuse +@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ ++ /* ++ * One for the caller and one for nlookup (released in ++ * unref_inode_lolocked()) ++ */ ++ g_atomic_int_set(&inode->refcount, 2); ++ + inode->nlookup = 1; + inode->fd = newfd; + newfd = -1; +@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; ++ lo_inode_put(lo, &inode); ++ lo_inode_put(lo, &dir); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, + name, (unsigned long long)e->ino); +@@ -894,6 +959,8 @@ out_err: + if (newfd != -1) { + close(newfd); + } ++ lo_inode_put(lo, &inode); ++ lo_inode_put(lo, &dir); + return saverr; + } + +@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + { + int res; + int saverr; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *dir; + struct fuse_entry_param e; + struct lo_cred old = {}; +@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); ++ lo_inode_put(lo, &dir); + return; + + out: ++ lo_inode_put(lo, &dir); + fuse_reply_err(req, saverr); + } + +@@ -1085,6 +1155,7 @@ fallback: + if (res != -1) { + res = linkat(parent->fd, path, dfd, name, 0); + unref_inode_lolocked(lo, parent, 1); ++ lo_inode_put(lo, &parent); + } + + return res; +@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + { + int res; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *parent_inode; + struct lo_inode *inode; + struct fuse_entry_param e; + int saverr; +@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + return; + } + ++ parent_inode = lo_inode(req, parent); + inode = lo_inode(req, ino); +- if (!inode) { +- fuse_reply_err(req, EBADF); +- return; ++ if (!parent_inode || !inode) { ++ errno = EBADF; ++ goto out_err; + } + + memset(&e, 0, sizeof(struct fuse_entry_param)); + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; + +- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); ++ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); + if (res == -1) { + goto out_err; + } +@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + ++/* Increments nlookup and caller must release refcount using lo_inode_put() */ + static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, + const char *name) + { +@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + + fuse_reply_err(req, res == -1 ? errno : 0); + unref_inode_lolocked(lo, inode, 1); ++ lo_inode_put(lo, &inode); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + unsigned int flags) + { + int res; +- struct lo_inode *oldinode; +- struct lo_inode *newinode; ++ struct lo_inode *parent_inode; ++ struct lo_inode *newparent_inode; ++ struct lo_inode *oldinode = NULL; ++ struct lo_inode *newinode = NULL; + struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { +@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + return; + } + ++ parent_inode = lo_inode(req, parent); ++ newparent_inode = lo_inode(req, newparent); ++ if (!parent_inode || !newparent_inode) { ++ fuse_reply_err(req, EBADF); ++ goto out; ++ } ++ + oldinode = lookup_name(req, parent, name); + newinode = lookup_name(req, newparent, newname); + +@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + #ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); + #else +- res = syscall(SYS_renameat2, lo_fd(req, parent), name, +- lo_fd(req, newparent), newname, flags); ++ res = syscall(SYS_renameat2, parent_inode->fd, name, ++ newparent_inode->fd, newname, flags); + if (res == -1 && errno == ENOSYS) { + fuse_reply_err(req, EINVAL); + } else { +@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); ++ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); + + fuse_reply_err(req, res == -1 ? errno : 0); + out: + unref_inode_lolocked(lo, oldinode, 1); + unref_inode_lolocked(lo, newinode, 1); ++ lo_inode_put(lo, &oldinode); ++ lo_inode_put(lo, &newinode); ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &newparent_inode); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) +@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + + fuse_reply_err(req, res == -1 ? errno : 0); + unref_inode_lolocked(lo, inode, 1); ++ lo_inode_put(lo, &inode); + } + + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); + pthread_mutex_unlock(&lo->mutex); +- close(inode->fd); +- free(inode); ++ ++ /* Drop our refcount from lo_do_lookup() */ ++ lo_inode_put(lo, &inode); + } else { + pthread_mutex_unlock(&lo->mutex); + } +@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) + inode->nlookup = 0; + lo_map_remove(&lo->ino_map, inode->fuse_ino); + close(inode->fd); ++ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ + + return TRUE; + } +@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + (unsigned long long)nlookup); + + unref_inode_lolocked(lo, inode, nlookup); ++ lo_inode_put(lo, &inode); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) +@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + err = 0; + error: + lo_dirp_put(&d); ++ lo_inode_put(lo, &dinode); + + /* + * If there's an error, we can only signal it if we haven't stored +@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + { + int fd; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *parent_inode; + struct fuse_entry_param e; + int err; + struct lo_cred old = {}; +@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + return; + } + ++ parent_inode = lo_inode(req, parent); ++ if (!parent_inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + err = lo_change_cred(req, &old); + if (err) { + goto out; + } + +- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, ++ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); + err = fd == -1 ? errno : 0; + lo_restore_cred(&old); +@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); +- fuse_reply_err(req, ENOMEM); +- return; ++ err = ENOMEM; ++ goto out; + } + + fi->fh = fh; +@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + out: ++ lo_inode_put(lo, &parent_inode); ++ + if (err) { + fuse_reply_err(req, err); + } else { +@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + plock = + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); + if (!plock) { +- pthread_mutex_unlock(&inode->plock_mutex); +- fuse_reply_err(req, ret); +- return; ++ saverr = ret; ++ goto out; + } + + ret = fcntl(plock->fd, F_OFD_GETLK, lock); + if (ret == -1) { + saverr = errno; + } ++ ++out: + pthread_mutex_unlock(&inode->plock_mutex); ++ lo_inode_put(lo, &inode); + + if (saverr) { + fuse_reply_err(req, saverr); +@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); + + if (!plock) { +- pthread_mutex_unlock(&inode->plock_mutex); +- fuse_reply_err(req, ret); +- return; ++ saverr = ret; ++ goto out; + } + + /* TODO: Is it alright to modify flock? */ +@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + if (ret == -1) { + saverr = errno; + } ++ ++out: + pthread_mutex_unlock(&inode->plock_mutex); ++ lo_inode_put(lo, &inode); ++ + fuse_reply_err(req, saverr); + } + +@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + pthread_mutex_unlock(&inode->plock_mutex); + + res = close(dup(lo_fi_fd(req, fi))); ++ lo_inode_put(lo_data(req), &inode); + fuse_reply_err(req, res == -1 ? errno : 0); + } + +@@ -2115,11 +2227,14 @@ out_free: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; + out: ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2190,11 +2305,14 @@ out_free: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; + out: ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2243,6 +2361,8 @@ out: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -2289,6 +2409,8 @@ out: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; + root->nlookup = 2; ++ g_atomic_int_set(&root->refcount, 2); + } + + static guint lo_key_hash(gconstpointer key) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-jail-lo-proc_self_fd.patch b/SOURCES/kvm-virtiofsd-jail-lo-proc_self_fd.patch new file mode 100644 index 0000000..df69242 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-jail-lo-proc_self_fd.patch @@ -0,0 +1,85 @@ +From 852a0a22d674b0594aecf0912a0885d197f34978 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 5 May 2020 16:35:57 +0100 +Subject: [PATCH 6/9] virtiofsd: jail lo->proc_self_fd + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200505163600.22956-5-dgilbert@redhat.com> +Patchwork-id: 96275 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 4/7] virtiofsd: jail lo->proc_self_fd +Bugzilla: 1817445 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Michael S. Tsirkin + +From: Miklos Szeredi + +While it's not possible to escape the proc filesystem through +lo->proc_self_fd, it is possible to escape to the root of the proc +filesystem itself through "../..". + +Use a temporary mount for opening lo->proc_self_fd, that has it's root at +/proc/self/fd/, preventing access to the ancestor directories. + +Signed-off-by: Miklos Szeredi +Message-Id: <20200429124733.22488-1-mszeredi@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 397ae982f4df46e7d4b2625c431062c9146f3b83) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 184ad0f..73d8405 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2540,6 +2540,8 @@ static void print_capabilities(void) + static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + { + pid_t child; ++ char template[] = "virtiofsd-XXXXXX"; ++ char *tmpdir; + + /* + * Create a new pid namespace for *child* processes. We'll have to +@@ -2601,12 +2603,33 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + exit(1); + } + ++ tmpdir = mkdtemp(template); ++ if (!tmpdir) { ++ fuse_log(FUSE_LOG_ERR, "tmpdir(%s): %m\n", template); ++ exit(1); ++ } ++ ++ if (mount("/proc/self/fd", tmpdir, NULL, MS_BIND, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, %s, MS_BIND): %m\n", ++ tmpdir); ++ exit(1); ++ } ++ + /* Now we can get our /proc/self/fd directory file descriptor */ +- lo->proc_self_fd = open("/proc/self/fd", O_PATH); ++ lo->proc_self_fd = open(tmpdir, O_PATH); + if (lo->proc_self_fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); ++ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", tmpdir); + exit(1); + } ++ ++ if (umount2(tmpdir, MNT_DETACH) < 0) { ++ fuse_log(FUSE_LOG_ERR, "umount2(%s, MNT_DETACH): %m\n", tmpdir); ++ exit(1); ++ } ++ ++ if (rmdir(tmpdir) < 0) { ++ fuse_log(FUSE_LOG_ERR, "rmdir(%s): %m\n", tmpdir); ++ } + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch b/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch new file mode 100644 index 0000000..bc04f6b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch @@ -0,0 +1,46 @@ +From ece7649025fbdbde48ff0b954e8ec2e42c4a8b3d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:10 +0000 +Subject: [PATCH 14/18] virtiofsd: load_capng missing unlock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-4-dgilbert@redhat.com> +Patchwork-id: 94126 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/7] virtiofsd: load_capng missing unlock +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Missing unlock in error path. + +Fixes: Covertiy CID 1413123 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 686391112fd42c615bcc4233472887a66a9b5a4a) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e6f2399..c635fc8 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -232,6 +232,7 @@ static int load_capng(void) + */ + cap.saved = capng_save_state(); + if (!cap.saved) { ++ pthread_mutex_unlock(&cap.mutex); + fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); + return -EINVAL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch b/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch new file mode 100644 index 0000000..d6cb0e3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch @@ -0,0 +1,76 @@ +From 7f2e1f79a3addb242c3018c7a80e2e57589119f0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:08 +0100 +Subject: [PATCH 037/116] virtiofsd: make -f (foreground) the default +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-34-dgilbert@redhat.com> +Patchwork-id: 93489 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 033/112] virtiofsd: make -f (foreground) the default +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +According to vhost-user.rst "Backend program conventions", backend +programs should run in the foregound by default. Follow the +conventions so libvirt and other management tools can control virtiofsd +in a standard way. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0bbd31753714ac2899efda0f0de31e353e965789) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 676032e..a3645fc 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -29,6 +29,11 @@ + { \ + t, offsetof(struct fuse_cmdline_opts, p), 1 \ + } ++#define FUSE_HELPER_OPT_VALUE(t, p, v) \ ++ { \ ++ t, offsetof(struct fuse_cmdline_opts, p), v \ ++ } ++ + + static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("-h", show_help), +@@ -42,6 +47,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), + FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0), + FUSE_HELPER_OPT("fsname=", nodefault_subtype), + FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("subtype=", nodefault_subtype), +@@ -131,6 +137,7 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" ++ " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" + " allowed (default: 10)\n"); +@@ -158,6 +165,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + memset(opts, 0, sizeof(struct fuse_cmdline_opts)); + + opts->max_idle_threads = 10; ++ opts->foreground = 1; + + if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == + -1) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch b/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch new file mode 100644 index 0000000..6d88549 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch @@ -0,0 +1,62 @@ +From 4ebabb66f4132186152edf8e1907fce436bf5c69 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:06 +0100 +Subject: [PATCH 095/116] virtiofsd: make lo_release() atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-92-dgilbert@redhat.com> +Patchwork-id: 93545 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 091/112] virtiofsd: make lo_release() atomic +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Hold the lock across both lo_map_get() and lo_map_remove() to prevent +races between two FUSE_RELEASE requests. In this case I don't see a +serious bug but it's safer to do things atomically. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit baed65c060c0e524530bc243eec427fb408bd477) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9414935..690edbc 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1772,14 +1772,18 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); +- int fd; ++ struct lo_map_elem *elem; ++ int fd = -1; + + (void)ino; + +- fd = lo_fi_fd(req, fi); +- + pthread_mutex_lock(&lo->mutex); +- lo_map_remove(&lo->fd_map, fi->fh); ++ elem = lo_map_get(&lo->fd_map, fi->fh); ++ if (elem) { ++ fd = elem->fd; ++ elem = NULL; ++ lo_map_remove(&lo->fd_map, fi->fh); ++ } + pthread_mutex_unlock(&lo->mutex); + + close(fd); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch b/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch new file mode 100644 index 0000000..9a33d1b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch @@ -0,0 +1,223 @@ +From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:29 +0100 +Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-55-dgilbert@redhat.com> +Patchwork-id: 93510 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +virtiofsd needs access to /proc/self/fd. Let's move to a new pid +namespace so that a compromised process cannot see another other +processes running on the system. + +One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child* +processes and not the current process. Therefore we need to fork the +pid 1 process that will actually run virtiofsd and leave a parent in +waitpid(2). This is not the same thing as daemonization and parent +processes should not notice a difference. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++-------------- + 1 file changed, 86 insertions(+), 48 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 27ab328..0947d14 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -51,7 +51,10 @@ + #include + #include + #include ++#include + #include ++#include ++#include + #include + #include + +@@ -1945,24 +1948,95 @@ static void print_capabilities(void) + } + + /* +- * Called after our UNIX domain sockets have been created, now we can move to +- * an empty network namespace to prevent TCP/IP and other network activity in +- * case this process is compromised. ++ * Move to a new mount, net, and pid namespaces to isolate this process. + */ +-static void setup_net_namespace(void) ++static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + { +- if (unshare(CLONE_NEWNET) != 0) { +- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); ++ pid_t child; ++ ++ /* ++ * Create a new pid namespace for *child* processes. We'll have to ++ * fork in order to enter the new pid namespace. A new mount namespace ++ * is also needed so that we can remount /proc for the new pid ++ * namespace. ++ * ++ * Our UNIX domain sockets have been created. Now we can move to ++ * an empty network namespace to prevent TCP/IP and other network ++ * activity in case this process is compromised. ++ */ ++ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); ++ exit(1); ++ } ++ ++ child = fork(); ++ if (child < 0) { ++ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); ++ exit(1); ++ } ++ if (child > 0) { ++ pid_t waited; ++ int wstatus; ++ ++ /* The parent waits for the child */ ++ do { ++ waited = waitpid(child, &wstatus, 0); ++ } while (waited < 0 && errno == EINTR && !se->exited); ++ ++ /* We were terminated by a signal, see fuse_signals.c */ ++ if (se->exited) { ++ exit(0); ++ } ++ ++ if (WIFEXITED(wstatus)) { ++ exit(WEXITSTATUS(wstatus)); ++ } ++ ++ exit(1); ++ } ++ ++ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ ++ prctl(PR_SET_PDEATHSIG, SIGTERM); ++ ++ /* ++ * If the mounts have shared propagation then we want to opt out so our ++ * mount changes don't affect the parent mount namespace. ++ */ ++ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); ++ exit(1); ++ } ++ ++ /* The child must remount /proc to use the new pid namespace */ ++ if (mount("proc", "/proc", "proc", ++ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); ++ exit(1); ++ } ++ ++ /* Now we can get our /proc/self/fd directory file descriptor */ ++ lo->proc_self_fd = open("/proc/self/fd", O_PATH); ++ if (lo->proc_self_fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); + exit(1); + } + } + +-/* This magic is based on lxc's lxc_pivot_root() */ +-static void setup_pivot_root(const char *source) ++/* ++ * Make the source directory our root so symlinks cannot escape and no other ++ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. ++ */ ++static void setup_mounts(const char *source) + { + int oldroot; + int newroot; + ++ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); ++ exit(1); ++ } ++ ++ /* This magic is based on lxc's lxc_pivot_root() */ + oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); + if (oldroot < 0) { + fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); +@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source) + close(oldroot); + } + +-static void setup_proc_self_fd(struct lo_data *lo) +-{ +- lo->proc_self_fd = open("/proc/self/fd", O_PATH); +- if (lo->proc_self_fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); +- exit(1); +- } +-} +- +-/* +- * Make the source directory our root so symlinks cannot escape and no other +- * files are accessible. +- */ +-static void setup_mount_namespace(const char *source) +-{ +- if (unshare(CLONE_NEWNS) != 0) { +- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); +- exit(1); +- } +- +- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { +- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); +- exit(1); +- } +- +- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { +- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); +- exit(1); +- } +- +- setup_pivot_root(source); +-} +- + /* + * Lock down this process to prevent access to other processes or files outside + * source directory. This reduces the impact of arbitrary code execution bugs. + */ +-static void setup_sandbox(struct lo_data *lo) ++static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + { +- setup_net_namespace(); +- setup_mount_namespace(lo->source); ++ setup_namespaces(lo, se); ++ setup_mounts(lo->source); + } + + int main(int argc, char *argv[]) +@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + +- /* Must be after daemonize to get the right /proc/self/fd */ +- setup_proc_self_fd(&lo); +- +- setup_sandbox(&lo); ++ setup_sandbox(&lo, se); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch b/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch new file mode 100644 index 0000000..69a7c20 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch @@ -0,0 +1,66 @@ +From 19a16f26bdeb6302159736e182a18b06160a3f42 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:28 +0100 +Subject: [PATCH 057/116] virtiofsd: move to an empty network namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-54-dgilbert@redhat.com> +Patchwork-id: 93508 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 053/112] virtiofsd: move to an empty network namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +If the process is compromised there should be no network access. Use an +empty network namespace to sandbox networking. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d74830d12ae233186ff74ddf64c552d26bb39e50) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0570453..27ab328 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1944,6 +1944,19 @@ static void print_capabilities(void) + printf("}\n"); + } + ++/* ++ * Called after our UNIX domain sockets have been created, now we can move to ++ * an empty network namespace to prevent TCP/IP and other network activity in ++ * case this process is compromised. ++ */ ++static void setup_net_namespace(void) ++{ ++ if (unshare(CLONE_NEWNET) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); ++ exit(1); ++ } ++} ++ + /* This magic is based on lxc's lxc_pivot_root() */ + static void setup_pivot_root(const char *source) + { +@@ -2035,6 +2048,7 @@ static void setup_mount_namespace(const char *source) + */ + static void setup_sandbox(struct lo_data *lo) + { ++ setup_net_namespace(); + setup_mount_namespace(lo->source); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-only-retain-file-system-capabilities.patch b/SOURCES/kvm-virtiofsd-only-retain-file-system-capabilities.patch new file mode 100644 index 0000000..15c8cd8 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-only-retain-file-system-capabilities.patch @@ -0,0 +1,112 @@ +From 8727e4904e7a6588e39f231d837f4527f265e47e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 5 May 2020 16:35:59 +0100 +Subject: [PATCH 8/9] virtiofsd: only retain file system capabilities + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200505163600.22956-7-dgilbert@redhat.com> +Patchwork-id: 96272 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 6/7] virtiofsd: only retain file system capabilities +Bugzilla: 1817445 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Michael S. Tsirkin + +From: Stefan Hajnoczi + +virtiofsd runs as root but only needs a subset of root's Linux +capabilities(7). As a file server its purpose is to create and access +files on behalf of a client. It needs to be able to access files with +arbitrary uid/gid owners. It also needs to be create device nodes. + +Introduce a Linux capabilities(7) whitelist and drop all capabilities +that we don't need, making the virtiofsd process less powerful than a +regular uid root process. + + # cat /proc/PID/status + ... + Before After + CapInh: 0000000000000000 0000000000000000 + CapPrm: 0000003fffffffff 00000000880000df + CapEff: 0000003fffffffff 00000000880000df + CapBnd: 0000003fffffffff 0000000000000000 + CapAmb: 0000000000000000 0000000000000000 + +Note that file capabilities cannot be used to achieve the same effect on +the virtiofsd executable because mount is used during sandbox setup. +Therefore we drop capabilities programmatically at the right point +during startup. + +This patch only affects the sandboxed child process. The parent process +that sits in waitpid(2) still has full root capabilities and will be +addressed in the next patch. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20200416164907.244868-2-stefanha@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a59feb483b8fae24d043569ccfcc97ea23d54a02) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 614ba55..6358874 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2723,6 +2723,43 @@ static void setup_mounts(const char *source) + } + + /* ++ * Only keep whitelisted capabilities that are needed for file system operation ++ */ ++static void setup_capabilities(void) ++{ ++ pthread_mutex_lock(&cap.mutex); ++ capng_restore_state(&cap.saved); ++ ++ /* ++ * Whitelist file system-related capabilities that are needed for a file ++ * server to act like root. Drop everything else like networking and ++ * sysadmin capabilities. ++ * ++ * Exclusions: ++ * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl ++ * and we don't support that. ++ * 2. CAP_MAC_OVERRIDE is not included because it only seems to be ++ * used by the Smack LSM. Omit it until there is demand for it. ++ */ ++ capng_setpid(syscall(SYS_gettid)); ++ capng_clear(CAPNG_SELECT_BOTH); ++ capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, ++ CAP_CHOWN, ++ CAP_DAC_OVERRIDE, ++ CAP_DAC_READ_SEARCH, ++ CAP_FOWNER, ++ CAP_FSETID, ++ CAP_SETGID, ++ CAP_SETUID, ++ CAP_MKNOD, ++ CAP_SETFCAP); ++ capng_apply(CAPNG_SELECT_BOTH); ++ ++ cap.saved = capng_save_state(); ++ pthread_mutex_unlock(&cap.mutex); ++} ++ ++/* + * Lock down this process to prevent access to other processes or files outside + * source directory. This reduces the impact of arbitrary code execution bugs. + */ +@@ -2732,6 +2769,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, + setup_namespaces(lo, se); + setup_mounts(lo->source); + setup_seccomp(enable_syslog); ++ setup_capabilities(); + } + + /* Set the maximum number of open file descriptors */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch new file mode 100644 index 0000000..e3d5773 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch @@ -0,0 +1,54 @@ +From fe031dbbf5e287f64de9fcc9aec361e8ab492109 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:24 +0100 +Subject: [PATCH 113/116] virtiofsd/passthrough_ll: Pass errno to + fuse_reply_err() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-110-dgilbert@redhat.com> +Patchwork-id: 93559 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 109/112] virtiofsd/passthrough_ll: Pass errno to fuse_reply_err() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +lo_copy_file_range() passes -errno to fuse_reply_err() and then fuse_reply_err() +changes it to errno again, so that subsequent fuse_send_reply_iov_nofree() catches +the wrong errno.(i.e. reports "fuse: bad error value: ..."). + +Make fuse_send_reply_iov_nofree() accept the correct -errno by passing errno +directly in lo_copy_file_range(). + +Signed-off-by: Xiao Yang +Reviewed-by: Eryu Guan + +dgilbert: Sent upstream and now Merged as aa1185e153f774f1df65 +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a931b6861e59c78d861017e9c6a9c161ff49a163) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index fc15d61..e6f2399 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2441,7 +2441,7 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, + + res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); + if (res < 0) { +- fuse_reply_err(req, -errno); ++ fuse_reply_err(req, errno); + } else { + fuse_reply_write(req, res); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch new file mode 100644 index 0000000..ddacdbe --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch @@ -0,0 +1,48 @@ +From 83b03fc4a3ecf6086394363488bbebc8d55428c0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:16 +0100 +Subject: [PATCH 105/116] virtiofsd: passthrough_ll: Use cache_readdir for + directory open +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-102-dgilbert@redhat.com> +Patchwork-id: 93555 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 101/112] virtiofsd: passthrough_ll: Use cache_readdir for directory open +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Misono Tomohiro + +Since keep_cache(FOPEN_KEEP_CACHE) has no effect for directory as +described in fuse_common.h, use cache_readdir(FOPNE_CACHE_DIR) for +diretory open when cache=always mode. + +Signed-off-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9b610b09b49b1aada256097b338d49da805da6ae) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 4c61ac5..79b8b71 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1523,7 +1523,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + + fi->fh = fh; + if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ fi->cache_readdir = 1; + } + fuse_reply_open(req, fi); + return; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch new file mode 100644 index 0000000..0506574 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch @@ -0,0 +1,238 @@ +From 474d0adafed4d73720d6413b2903d6c4b529e5e6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:15 +0100 +Subject: [PATCH 044/116] virtiofsd: passthrough_ll: add dirp_map to hide + lo_dirp pointers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-41-dgilbert@redhat.com> +Patchwork-id: 93495 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 040/112] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose lo_dirp pointers to clients. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b39bce121bfad8757eec0ee41f14607b883935d3) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 103 +++++++++++++++++++++++++++++---------- + 1 file changed, 76 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index a3ebf74..5f5a72f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -56,27 +56,10 @@ + + #include "passthrough_helpers.h" + +-/* +- * We are re-using pointers to our `struct lo_inode` +- * elements as inodes. This means that we must be able to +- * store uintptr_t values in a fuse_ino_t variable. The following +- * incantation checks this condition at compile time. +- */ +-#if defined(__GNUC__) && \ +- (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ +- !defined __cplusplus +-_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), +- "fuse_ino_t too small to hold uintptr_t values!"); +-#else +-struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { +- unsigned _uintptr_to_must_hold_fuse_ino_t +- : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); +-}; +-#endif +- + struct lo_map_elem { + union { + struct lo_inode *inode; ++ struct lo_dirp *dirp; + ssize_t freelist; + }; + bool in_use; +@@ -123,6 +106,7 @@ struct lo_data { + int timeout_set; + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ ++ struct lo_map dirp_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -253,6 +237,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ ++static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->dirp = dirp; ++ return elem - lo_data(req)->dirp_map.elems; ++} ++ ++/* Assumes lo->mutex is held */ + static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) + { + struct lo_map_elem *elem; +@@ -861,9 +859,19 @@ struct lo_dirp { + off_t offset; + }; + +-static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) ++static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + { +- return (struct lo_dirp *)(uintptr_t)fi->fh; ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->dirp_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ if (!elem) { ++ return NULL; ++ } ++ ++ return elem->dirp; + } + + static void lo_opendir(fuse_req_t req, fuse_ino_t ino, +@@ -873,6 +881,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + struct lo_data *lo = lo_data(req); + struct lo_dirp *d; + int fd; ++ ssize_t fh; + + d = calloc(1, sizeof(struct lo_dirp)); + if (d == NULL) { +@@ -892,7 +901,14 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + d->offset = 0; + d->entry = NULL; + +- fi->fh = (uintptr_t)d; ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_dirp_mapping(req, d); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ goto out_err; ++ } ++ ++ fi->fh = fh; + if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; + } +@@ -903,6 +919,9 @@ out_errno: + error = errno; + out_err: + if (d) { ++ if (d->dp) { ++ closedir(d->dp); ++ } + if (fd != -1) { + close(fd); + } +@@ -920,17 +939,21 @@ static int is_dot_or_dotdot(const char *name) + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { +- struct lo_dirp *d = lo_dirp(fi); +- char *buf; ++ struct lo_dirp *d; ++ char *buf = NULL; + char *p; + size_t rem = size; +- int err; ++ int err = ENOMEM; + + (void)ino; + ++ d = lo_dirp(req, fi); ++ if (!d) { ++ goto error; ++ } ++ + buf = calloc(1, size); + if (!buf) { +- err = ENOMEM; + goto error; + } + p = buf; +@@ -1028,8 +1051,21 @@ static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, + static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { +- struct lo_dirp *d = lo_dirp(fi); ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ + (void)ino; ++ ++ d = lo_dirp(req, fi); ++ if (!d) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ lo_map_remove(&lo->dirp_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ + closedir(d->dp); + free(d); + fuse_reply_err(req, 0); +@@ -1081,8 +1117,18 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { + int res; +- int fd = dirfd(lo_dirp(fi)->dp); ++ struct lo_dirp *d; ++ int fd; ++ + (void)ino; ++ ++ d = lo_dirp(req, fi); ++ if (!d) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ fd = dirfd(d->dp); + if (datasync) { + res = fdatasync(fd); + } else { +@@ -1614,6 +1660,8 @@ int main(int argc, char *argv[]) + root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); + root_elem->inode = &lo.root; + ++ lo_map_init(&lo.dirp_map); ++ + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } +@@ -1710,6 +1758,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + + if (lo.root.fd >= 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch new file mode 100644 index 0000000..b8de3d8 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch @@ -0,0 +1,303 @@ +From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:17 +0100 +Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-43-dgilbert@redhat.com> +Patchwork-id: 93496 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +We have two operations that cannot be done race-free on a symlink in +certain cases: utimes and link. + +Add racy fallback for these if the race-free method doesn't work. We do +our best to avoid races even in this case: + + - get absolute path by reading /proc/self/fd/NN symlink + + - lookup parent directory: after this we are safe against renames in + ancestors + + - lookup name in parent directory, and verify that we got to the original + inode, if not retry the whole thing + +Both utimes(2) and link(2) hold i_lock on the inode across the operation, +so a racing rename/delete by this fuse instance is not possible, only from +other entities changing the filesystem. + +If the "norace" option is given, then disable the racy fallbacks. + +Signed-off-by: Miklos Szeredi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 5 +- + tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++---- + 2 files changed, 145 insertions(+), 17 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index b8ec5ac..5531425 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -142,7 +142,10 @@ void fuse_cmdline_help(void) + " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" +- " allowed (default: 10)\n"); ++ " allowed (default: 10)\n" ++ " -o norace disable racy fallback\n" ++ " default: false\n" ++ ); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9815bfa..ac380ef 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -98,6 +98,7 @@ enum { + struct lo_data { + pthread_mutex_t mutex; + int debug; ++ int norace; + int writeback; + int flock; + int xattr; +@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = { + { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, + { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, +- ++ { "norace", offsetof(struct lo_data, norace), 1 }, + FUSE_OPT_END + }; + ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); ++ ++static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); ++ ++ + static struct lo_data *lo_data(fuse_req_t req) + { + return (struct lo_data *)fuse_req_userdata(req); +@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, + fuse_reply_attr(req, &buf, lo->timeout); + } + +-static int utimensat_empty_nofollow(struct lo_inode *inode, +- const struct timespec *tv) ++static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, ++ char path[PATH_MAX], struct lo_inode **parent) + { +- int res; + char procname[64]; ++ char *last; ++ struct stat stat; ++ struct lo_inode *p; ++ int retries = 2; ++ int res; ++ ++retry: ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ res = readlink(procname, path, PATH_MAX); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); ++ goto fail_noretry; ++ } ++ ++ if (res >= PATH_MAX) { ++ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); ++ goto fail_noretry; ++ } ++ path[res] = '\0'; ++ ++ last = strrchr(path, '/'); ++ if (last == NULL) { ++ /* Shouldn't happen */ ++ fuse_log( ++ FUSE_LOG_WARNING, ++ "%s: INTERNAL ERROR: bad path read from proc\n", __func__); ++ goto fail_noretry; ++ } ++ if (last == path) { ++ p = &lo->root; ++ pthread_mutex_lock(&lo->mutex); ++ p->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ } else { ++ *last = '\0'; ++ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); ++ if (res == -1) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to stat parent: %m\n", __func__); ++ } ++ goto fail; ++ } ++ p = lo_find(lo, &stat); ++ if (p == NULL) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to find parent\n", __func__); ++ } ++ goto fail; ++ } ++ } ++ last++; ++ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to stat last\n", __func__); ++ } ++ goto fail_unref; ++ } ++ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to match last\n", __func__); ++ } ++ goto fail_unref; ++ } ++ *parent = p; ++ memmove(path, last, strlen(last) + 1); ++ ++ return 0; ++ ++fail_unref: ++ unref_inode(lo, p, 1); ++fail: ++ if (retries) { ++ retries--; ++ goto retry; ++ } ++fail_noretry: ++ errno = EIO; ++ return -1; ++} ++ ++static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, ++ const struct timespec *tv) ++{ ++ int res; ++ struct lo_inode *parent; ++ char path[PATH_MAX]; + + if (inode->is_symlink) { +- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); + if (res == -1 && errno == EINVAL) { + /* Sorry, no race free way to set times on symlink. */ +- errno = EPERM; ++ if (lo->norace) { ++ errno = EPERM; ++ } else { ++ goto fallback; ++ } + } + return res; + } +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "/proc/self/fd/%i", inode->fd); + +- return utimensat(AT_FDCWD, procname, tv, 0); ++ return utimensat(AT_FDCWD, path, tv, 0); ++ ++fallback: ++ res = lo_parent_and_name(lo, inode, path, &parent); ++ if (res != -1) { ++ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); ++ unref_inode(lo, parent, 1); ++ } ++ ++ return res; + } + + static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) +@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + { + int saverr; + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + int ifd; + int res; +@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + res = futimens(fd, tv); + } else { +- res = utimensat_empty_nofollow(inode, tv); ++ res = utimensat_empty(lo, inode, tv); + } + if (res == -1) { + goto out_err; +@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, + lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); + } + +-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, +- const char *name) ++static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, ++ int dfd, const char *name) + { + int res; +- char procname[64]; ++ struct lo_inode *parent; ++ char path[PATH_MAX]; + + if (inode->is_symlink) { + res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { + /* Sorry, no race free way to hard-link a symlink. */ +- errno = EPERM; ++ if (lo->norace) { ++ errno = EPERM; ++ } else { ++ goto fallback; ++ } + } + return res; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "/proc/self/fd/%i", inode->fd); ++ ++ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); ++ ++fallback: ++ res = lo_parent_and_name(lo, inode, path, &parent); ++ if (res != -1) { ++ res = linkat(parent->fd, path, dfd, name, 0); ++ unref_inode(lo, parent, 1); ++ } + +- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++ return res; + } + + static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, +@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; + +- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); + if (res == -1) { + goto out_err; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch new file mode 100644 index 0000000..24b2a6e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch @@ -0,0 +1,328 @@ +From 35337e604e9149d6d8fcf74b8b82ac33a8611ebb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:16 +0100 +Subject: [PATCH 045/116] virtiofsd: passthrough_ll: add fd_map to hide file + descriptors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-42-dgilbert@redhat.com> +Patchwork-id: 93494 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 041/112] virtiofsd: passthrough_ll: add fd_map to hide file descriptors +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose file descriptor numbers to clients. This prevents the +abuse of internal file descriptors (like stdin/stdout). + +Signed-off-by: Stefan Hajnoczi +Fix from: +Signed-off-by: Xiao Yang +dgilbert: + Added lseek +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 73b4d19dfc4248a74c1f3e511cfa934681d9c602) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 116 +++++++++++++++++++++++++++++++-------- + 1 file changed, 94 insertions(+), 22 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 5f5a72f..9815bfa 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -60,6 +60,7 @@ struct lo_map_elem { + union { + struct lo_inode *inode; + struct lo_dirp *dirp; ++ int fd; + ssize_t freelist; + }; + bool in_use; +@@ -107,6 +108,7 @@ struct lo_data { + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ ++ struct lo_map fd_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -237,6 +239,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ ++static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->fd_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->fd = fd; ++ return elem - lo_data(req)->fd_map.elems; ++} ++ ++/* Assumes lo->mutex is held */ + static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) + { + struct lo_map_elem *elem; +@@ -350,6 +366,22 @@ static int utimensat_empty_nofollow(struct lo_inode *inode, + return utimensat(AT_FDCWD, procname, tv, 0); + } + ++static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->fd_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ if (!elem) { ++ return -1; ++ } ++ ++ return elem->fd; ++} ++ + static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int valid, struct fuse_file_info *fi) + { +@@ -358,6 +390,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + struct lo_inode *inode; + int ifd; + int res; ++ int fd; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -367,9 +400,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + + ifd = inode->fd; + ++ /* If fi->fh is invalid we'll report EBADF later */ ++ if (fi) { ++ fd = lo_fi_fd(req, fi); ++ } ++ + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { +- res = fchmod(fi->fh, attr->st_mode); ++ res = fchmod(fd, attr->st_mode); + } else { + sprintf(procname, "/proc/self/fd/%i", ifd); + res = chmod(procname, attr->st_mode); +@@ -389,7 +427,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + if (valid & FUSE_SET_ATTR_SIZE) { + if (fi) { +- res = ftruncate(fi->fh, attr->st_size); ++ res = ftruncate(fd, attr->st_size); + } else { + sprintf(procname, "/proc/self/fd/%i", ifd); + res = truncate(procname, attr->st_size); +@@ -419,7 +457,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + + if (fi) { +- res = futimens(fi->fh, tv); ++ res = futimens(fd, tv); + } else { + res = utimensat_empty_nofollow(inode, tv); + } +@@ -1096,7 +1134,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + lo_restore_cred(&old); + + if (!err) { +- fi->fh = fd; ++ ssize_t fh; ++ ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(req, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ++ fi->fh = fh; + err = lo_do_lookup(req, parent, name, &e); + } + if (lo->cache == CACHE_NEVER) { +@@ -1140,6 +1189,7 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int fd; ++ ssize_t fh; + char buf[64]; + struct lo_data *lo = lo_data(req); + +@@ -1175,7 +1225,16 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + return (void)fuse_reply_err(req, errno); + } + +- fi->fh = fd; ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(req, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ++ fi->fh = fh; + if (lo->cache == CACHE_NEVER) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { +@@ -1187,9 +1246,18 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + static void lo_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { ++ struct lo_data *lo = lo_data(req); ++ int fd; ++ + (void)ino; + +- close(fi->fh); ++ fd = lo_fi_fd(req, fi); ++ ++ pthread_mutex_lock(&lo->mutex); ++ lo_map_remove(&lo->fd_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ close(fd); + fuse_reply_err(req, 0); + } + +@@ -1197,7 +1265,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int res; + (void)ino; +- res = close(dup(fi->fh)); ++ res = close(dup(lo_fi_fd(req, fi))); + fuse_reply_err(req, res == -1 ? errno : 0); + } + +@@ -1224,7 +1292,7 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + return (void)fuse_reply_err(req, errno); + } + } else { +- fd = fi->fh; ++ fd = lo_fi_fd(req, fi); + } + + if (datasync) { +@@ -1251,7 +1319,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + } + + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- buf.buf[0].fd = fi->fh; ++ buf.buf[0].fd = lo_fi_fd(req, fi); + buf.buf[0].pos = offset; + + fuse_reply_data(req, &buf); +@@ -1266,7 +1334,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); + + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].fd = lo_fi_fd(req, fi); + out_buf.buf[0].pos = off; + + if (lo_debug(req)) { +@@ -1303,7 +1371,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + (void)ino; + + #ifdef CONFIG_FALLOCATE +- err = fallocate(fi->fh, mode, offset, length); ++ err = fallocate(lo_fi_fd(req, fi), mode, offset, length); + if (err < 0) { + err = errno; + } +@@ -1314,7 +1382,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + return; + } + +- err = posix_fallocate(fi->fh, offset, length); ++ err = posix_fallocate(lo_fi_fd(req, fi), offset, length); + #endif + + fuse_reply_err(req, err); +@@ -1326,7 +1394,7 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + int res; + (void)ino; + +- res = flock(fi->fh, op); ++ res = flock(lo_fi_fd(req, fi), op); + + fuse_reply_err(req, res == -1 ? errno : 0); + } +@@ -1551,17 +1619,19 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, + off_t off_out, struct fuse_file_info *fi_out, + size_t len, int flags) + { ++ int in_fd, out_fd; + ssize_t res; + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, +- "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, size=%zd, flags=0x%x)\n", +- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, +- flags); ++ in_fd = lo_fi_fd(req, fi_in); ++ out_fd = lo_fi_fd(req, fi_out); ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " ++ "off=%lu, ino=%" PRIu64 "/fd=%d, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); + +- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); ++ res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); + if (res < 0) { + fuse_reply_err(req, -errno); + } else { +@@ -1576,7 +1646,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + off_t res; + + (void)ino; +- res = lseek(fi->fh, off, whence); ++ res = lseek(lo_fi_fd(req, fi), off, whence); + if (res != -1) { + fuse_reply_lseek(req, res); + } else { +@@ -1661,6 +1731,7 @@ int main(int argc, char *argv[]) + root_elem->inode = &lo.root; + + lo_map_init(&lo.dirp_map); ++ lo_map_init(&lo.fd_map); + + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; +@@ -1758,6 +1829,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.fd_map); + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch new file mode 100644 index 0000000..ba8b730 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch @@ -0,0 +1,395 @@ +From d81396cc3d9815730903b0755c9d2e67d6954d54 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:14 +0100 +Subject: [PATCH 043/116] virtiofsd: passthrough_ll: add ino_map to hide + lo_inode pointers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-40-dgilbert@redhat.com> +Patchwork-id: 93493 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 039/112] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose lo_inode pointers to clients. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 92fb57b83cdbfc4bf53c0c46a3d0bcbc36e64126) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 144 +++++++++++++++++++++++++++++++-------- + 1 file changed, 114 insertions(+), 30 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e83a976..a3ebf74 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -57,8 +57,8 @@ + #include "passthrough_helpers.h" + + /* +- * We are re-using pointers to our `struct lo_inode` and `struct +- * lo_dirp` elements as inodes. This means that we must be able to ++ * We are re-using pointers to our `struct lo_inode` ++ * elements as inodes. This means that we must be able to + * store uintptr_t values in a fuse_ino_t variable. The following + * incantation checks this condition at compile time. + */ +@@ -76,7 +76,7 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { + + struct lo_map_elem { + union { +- /* Element values will go here... */ ++ struct lo_inode *inode; + ssize_t freelist; + }; + bool in_use; +@@ -97,6 +97,7 @@ struct lo_inode { + ino_t ino; + dev_t dev; + uint64_t refcount; /* protected by lo->mutex */ ++ fuse_ino_t fuse_ino; + }; + + struct lo_cred { +@@ -121,6 +122,7 @@ struct lo_data { + int cache; + int timeout_set; + struct lo_inode root; /* protected by lo->mutex */ ++ struct lo_map ino_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -145,14 +147,14 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + +-__attribute__((unused)) static void lo_map_init(struct lo_map *map) ++static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; + map->nelems = 0; + map->freelist = -1; + } + +-__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) ++static void lo_map_destroy(struct lo_map *map) + { + free(map->elems); + } +@@ -183,8 +185,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems) + return 1; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_alloc_elem(struct lo_map *map) ++static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) + { + struct lo_map_elem *elem; + +@@ -200,8 +201,7 @@ lo_map_alloc_elem(struct lo_map *map) + return elem; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_reserve(struct lo_map *map, size_t key) ++static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) + { + ssize_t *prev; + +@@ -222,8 +222,7 @@ lo_map_reserve(struct lo_map *map, size_t key) + return NULL; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_get(struct lo_map *map, size_t key) ++static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) + { + if (key >= map->nelems) { + return NULL; +@@ -234,8 +233,7 @@ lo_map_get(struct lo_map *map, size_t key) + return &map->elems[key]; + } + +-__attribute__((unused)) static void lo_map_remove(struct lo_map *map, +- size_t key) ++static void lo_map_remove(struct lo_map *map, size_t key) + { + struct lo_map_elem *elem; + +@@ -254,18 +252,40 @@ __attribute__((unused)) static void lo_map_remove(struct lo_map *map, + map->freelist = key; + } + ++/* Assumes lo->mutex is held */ ++static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->ino_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->inode = inode; ++ return elem - lo_data(req)->ino_map.elems; ++} ++ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { +- if (ino == FUSE_ROOT_ID) { +- return &lo_data(req)->root; +- } else { +- return (struct lo_inode *)(uintptr_t)ino; ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->ino_map, ino); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ if (!elem) { ++ return NULL; + } ++ ++ return elem->inode; + } + + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { +- return lo_inode(req, ino)->fd; ++ struct lo_inode *inode = lo_inode(req, ino); ++ return inode ? inode->fd : -1; + } + + static bool lo_debug(fuse_req_t req) +@@ -337,10 +357,18 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + { + int saverr; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- int ifd = inode->fd; ++ struct lo_inode *inode; ++ int ifd; + int res; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ ifd = inode->fd; ++ + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { + res = fchmod(fi->fh, attr->st_mode); +@@ -470,6 +498,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->dev = e->attr.st_dev; + + pthread_mutex_lock(&lo->mutex); ++ inode->fuse_ino = lo_add_inode_mapping(req, inode); + prev = &lo->root; + next = prev->next; + next->prev = inode; +@@ -478,7 +507,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + prev->next = inode; + pthread_mutex_unlock(&lo->mutex); + } +- e->ino = (uintptr_t)inode; ++ e->ino = inode->fuse_ino; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +@@ -582,10 +611,16 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + { + int res; + int saverr; +- struct lo_inode *dir = lo_inode(req, parent); ++ struct lo_inode *dir; + struct fuse_entry_param e; + struct lo_cred old = {}; + ++ dir = lo_inode(req, parent); ++ if (!dir) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOMEM; + + saverr = lo_change_cred(req, &old); +@@ -663,10 +698,16 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + { + int res; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + struct fuse_entry_param e; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + memset(&e, 0, sizeof(struct fuse_entry_param)); + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; +@@ -684,7 +725,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + pthread_mutex_lock(&lo->mutex); + inode->refcount++; + pthread_mutex_unlock(&lo->mutex); +- e.ino = (uintptr_t)inode; ++ e.ino = inode->fuse_ino; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +@@ -750,10 +791,10 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + next->prev = prev; + prev->next = next; + ++ lo_map_remove(&lo->ino_map, inode->fuse_ino); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +- + } else { + pthread_mutex_unlock(&lo->mutex); + } +@@ -762,7 +803,12 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ return; ++ } + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +@@ -1244,10 +1290,16 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + { + char *value = NULL; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1306,10 +1358,16 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { + char *value = NULL; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1367,10 +1425,16 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags) + { + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1400,10 +1464,16 @@ out: + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1522,6 +1592,7 @@ int main(int argc, char *argv[]) + struct fuse_session *se; + struct fuse_cmdline_opts opts; + struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ struct lo_map_elem *root_elem; + int ret = -1; + + /* Don't mask creation mode, kernel already did that */ +@@ -1530,8 +1601,19 @@ int main(int argc, char *argv[]) + pthread_mutex_init(&lo.mutex, NULL); + lo.root.next = lo.root.prev = &lo.root; + lo.root.fd = -1; ++ lo.root.fuse_ino = FUSE_ROOT_ID; + lo.cache = CACHE_NORMAL; + ++ /* ++ * Set up the ino map like this: ++ * [0] Reserved (will not be used) ++ * [1] Root inode ++ */ ++ lo_map_init(&lo.ino_map); ++ lo_map_reserve(&lo.ino_map, 0)->in_use = false; ++ root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); ++ root_elem->inode = &lo.root; ++ + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } +@@ -1628,6 +1710,8 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.ino_map); ++ + if (lo.root.fd >= 0) { + close(lo.root.fd); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch new file mode 100644 index 0000000..4751f95 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch @@ -0,0 +1,182 @@ +From d56651e227bae83ee0cceb12bd91e3e9f6045ab3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:13 +0100 +Subject: [PATCH 042/116] virtiofsd: passthrough_ll: add lo_map for ino/fh + indirection +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-39-dgilbert@redhat.com> +Patchwork-id: 93492 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 038/112] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +A layer of indirection is needed because passthrough_ll cannot expose +pointers or file descriptor numbers to untrusted clients. Malicious +clients could send invalid pointers or file descriptors in order to +crash or exploit the file system daemon. + +lo_map provides an integer key->value mapping. This will be used for +ino and fh fields in the patches that follow. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 25c135727b08dca90f00094e522a69170b13dfac) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 124 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 124 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 5e06179..e83a976 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -74,6 +74,21 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { + }; + #endif + ++struct lo_map_elem { ++ union { ++ /* Element values will go here... */ ++ ssize_t freelist; ++ }; ++ bool in_use; ++}; ++ ++/* Maps FUSE fh or ino values to internal objects */ ++struct lo_map { ++ struct lo_map_elem *elems; ++ size_t nelems; ++ ssize_t freelist; ++}; ++ + struct lo_inode { + struct lo_inode *next; /* protected by lo->mutex */ + struct lo_inode *prev; /* protected by lo->mutex */ +@@ -130,6 +145,115 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + ++__attribute__((unused)) static void lo_map_init(struct lo_map *map) ++{ ++ map->elems = NULL; ++ map->nelems = 0; ++ map->freelist = -1; ++} ++ ++__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) ++{ ++ free(map->elems); ++} ++ ++static int lo_map_grow(struct lo_map *map, size_t new_nelems) ++{ ++ struct lo_map_elem *new_elems; ++ size_t i; ++ ++ if (new_nelems <= map->nelems) { ++ return 1; ++ } ++ ++ new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); ++ if (!new_elems) { ++ return 0; ++ } ++ ++ for (i = map->nelems; i < new_nelems; i++) { ++ new_elems[i].freelist = i + 1; ++ new_elems[i].in_use = false; ++ } ++ new_elems[new_nelems - 1].freelist = -1; ++ ++ map->elems = new_elems; ++ map->freelist = map->nelems; ++ map->nelems = new_nelems; ++ return 1; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_alloc_elem(struct lo_map *map) ++{ ++ struct lo_map_elem *elem; ++ ++ if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { ++ return NULL; ++ } ++ ++ elem = &map->elems[map->freelist]; ++ map->freelist = elem->freelist; ++ ++ elem->in_use = true; ++ ++ return elem; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_reserve(struct lo_map *map, size_t key) ++{ ++ ssize_t *prev; ++ ++ if (!lo_map_grow(map, key + 1)) { ++ return NULL; ++ } ++ ++ for (prev = &map->freelist; *prev != -1; ++ prev = &map->elems[*prev].freelist) { ++ if (*prev == key) { ++ struct lo_map_elem *elem = &map->elems[key]; ++ ++ *prev = elem->freelist; ++ elem->in_use = true; ++ return elem; ++ } ++ } ++ return NULL; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_get(struct lo_map *map, size_t key) ++{ ++ if (key >= map->nelems) { ++ return NULL; ++ } ++ if (!map->elems[key].in_use) { ++ return NULL; ++ } ++ return &map->elems[key]; ++} ++ ++__attribute__((unused)) static void lo_map_remove(struct lo_map *map, ++ size_t key) ++{ ++ struct lo_map_elem *elem; ++ ++ if (key >= map->nelems) { ++ return; ++ } ++ ++ elem = &map->elems[key]; ++ if (!elem->in_use) { ++ return; ++ } ++ ++ elem->in_use = false; ++ ++ elem->freelist = map->freelist; ++ map->freelist = key; ++} ++ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { + if (ino == FUSE_ROOT_ID) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch new file mode 100644 index 0000000..a3f7970 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch @@ -0,0 +1,52 @@ +From 86b4f2865f2ebd7e6b3d85beb66a9390eb46eb96 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:45 +0100 +Subject: [PATCH 074/116] virtiofsd: passthrough_ll: add renameat2 support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-71-dgilbert@redhat.com> +Patchwork-id: 93531 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 070/112] virtiofsd: passthrough_ll: add renameat2 support +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f0ab7d6f78a7d3c1c19fd81a91c9b1199f56c4f6) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 98114a3..18d69ab 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1099,7 +1099,17 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + if (flags) { ++#ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); ++#else ++ res = syscall(SYS_renameat2, lo_fd(req, parent), name, ++ lo_fd(req, newparent), newname, flags); ++ if (res == -1 && errno == ENOSYS) { ++ fuse_reply_err(req, EINVAL); ++ } else { ++ fuse_reply_err(req, res == -1 ? errno : 0); ++ } ++#endif + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch new file mode 100644 index 0000000..dc87ef2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch @@ -0,0 +1,138 @@ +From 079199c53f483f0051f994b195ebb595aec76a39 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:51 +0100 +Subject: [PATCH 080/116] virtiofsd: passthrough_ll: clean up cache related + options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-77-dgilbert@redhat.com> +Patchwork-id: 93530 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 076/112] virtiofsd: passthrough_ll: clean up cache related options +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + + - Rename "cache=never" to "cache=none" to match 9p's similar option. + + - Rename CACHE_NORMAL constant to CACHE_AUTO to match the "cache=auto" + option. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 230e777b5e250759ee0480fcc0e9ccfa2b082fba) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 5 ++++- + tools/virtiofsd/passthrough_ll.c | 20 ++++++++++---------- + 2 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 14f5d70..5672024 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -145,6 +145,9 @@ void fuse_cmdline_help(void) + " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" ++ " -o cache= cache mode. could be one of \"auto, " ++ "always, none\"\n" ++ " default: auto\n" + " -o log_level= log level, default to \"info\"\n" + " level could be one of \"debug, " + "info, warn, err\"\n" +@@ -156,7 +159,7 @@ void fuse_cmdline_help(void) + " -o readdirplus|no_readdirplus\n" + " enable/disable readirplus\n" + " default: readdirplus except with " +- "cache=never\n" ++ "cache=none\n" + ); + } + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9e7191e..b40f287 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -101,8 +101,8 @@ struct lo_cred { + }; + + enum { +- CACHE_NEVER, +- CACHE_NORMAL, ++ CACHE_NONE, ++ CACHE_AUTO, + CACHE_ALWAYS, + }; + +@@ -138,8 +138,8 @@ static const struct fuse_opt lo_opts[] = { + { "no_xattr", offsetof(struct lo_data, xattr), 0 }, + { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, + { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, +- { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, +- { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, ++ { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, + { "norace", offsetof(struct lo_data, norace), 1 }, + { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, +@@ -482,7 +482,7 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } +- if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || ++ if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || + lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); + conn->want &= ~FUSE_CAP_READDIRPLUS; +@@ -1493,7 +1493,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + fi->fh = fh; + err = lo_do_lookup(req, parent, name, &e); + } +- if (lo->cache == CACHE_NEVER) { ++ if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; +@@ -1578,7 +1578,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + } + + fi->fh = fh; +- if (lo->cache == CACHE_NEVER) { ++ if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; +@@ -2395,7 +2395,7 @@ int main(int argc, char *argv[]) + lo.root.next = lo.root.prev = &lo.root; + lo.root.fd = -1; + lo.root.fuse_ino = FUSE_ROOT_ID; +- lo.cache = CACHE_NORMAL; ++ lo.cache = CACHE_AUTO; + + /* + * Set up the ino map like this: +@@ -2470,11 +2470,11 @@ int main(int argc, char *argv[]) + } + if (!lo.timeout_set) { + switch (lo.cache) { +- case CACHE_NEVER: ++ case CACHE_NONE: + lo.timeout = 0.0; + break; + +- case CACHE_NORMAL: ++ case CACHE_AUTO: + lo.timeout = 1.0; + break; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch new file mode 100644 index 0000000..c55eead --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch @@ -0,0 +1,154 @@ +From f93ea308351cbe2630d7ecf637c3b69894d84a11 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:13 +0000 +Subject: [PATCH 17/18] virtiofsd: passthrough_ll: cleanup getxattr/listxattr +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-7-dgilbert@redhat.com> +Patchwork-id: 94125 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/7] virtiofsd: passthrough_ll: cleanup getxattr/listxattr +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Misono Tomohiro + +This is a cleanup patch to simplify the following xattr fix and +there is no functional changes. + +- Move memory allocation to head of the function +- Unify fgetxattr/flistxattr call for both size == 0 and + size != 0 case +- Remove redundant lo_inode_put call in error path + (Note: second call is ignored now since @inode is already NULL) + +Signed-off-by: Misono Tomohiro +Message-Id: <20200227055927.24566-2-misono.tomohiro@jp.fujitsu.com> +Acked-by: Vivek Goyal +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 16e15a73089102c3d8846792d514e769300fcc3c) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 54 ++++++++++++++++------------------------ + 1 file changed, 22 insertions(+), 32 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c635fc8..50c7273 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2199,34 +2199,30 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ } ++ + sprintf(procname, "%i", inode->fd); + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ++ ret = fgetxattr(fd, name, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } + if (size) { +- value = malloc(size); +- if (!value) { +- goto out_err; +- } +- +- ret = fgetxattr(fd, name, value, size); +- if (ret == -1) { +- goto out_err; +- } + saverr = 0; + if (ret == 0) { + goto out; + } +- + fuse_reply_buf(req, value, ret); + } else { +- ret = fgetxattr(fd, name, NULL, 0); +- if (ret == -1) { +- goto out_err; +- } +- + fuse_reply_xattr(req, ret); + } + out_free: +@@ -2242,7 +2238,6 @@ out_free: + out_err: + saverr = errno; + out: +- lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2277,34 +2272,30 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ } ++ + sprintf(procname, "%i", inode->fd); + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ++ ret = flistxattr(fd, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } + if (size) { +- value = malloc(size); +- if (!value) { +- goto out_err; +- } +- +- ret = flistxattr(fd, value, size); +- if (ret == -1) { +- goto out_err; +- } + saverr = 0; + if (ret == 0) { + goto out; + } +- + fuse_reply_buf(req, value, ret); + } else { +- ret = flistxattr(fd, NULL, 0); +- if (ret == -1) { +- goto out_err; +- } +- + fuse_reply_xattr(req, ret); + } + out_free: +@@ -2320,7 +2311,6 @@ out_free: + out_err: + saverr = errno; + out: +- lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch new file mode 100644 index 0000000..98d00fc --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch @@ -0,0 +1,79 @@ +From 0f1d456fad4ba6a696eff8976b9fe8a0f251e1b5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:47 +0100 +Subject: [PATCH 076/116] virtiofsd: passthrough_ll: control readdirplus +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-73-dgilbert@redhat.com> +Patchwork-id: 93524 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 072/112] virtiofsd: passthrough_ll: control readdirplus +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 59aef494be2d8d91055ff3f3a8eb13d9f32873d8) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 4 ++++ + tools/virtiofsd/passthrough_ll.c | 7 ++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 6d50a46..14f5d70 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -153,6 +153,10 @@ void fuse_cmdline_help(void) + " allowed (default: 10)\n" + " -o norace disable racy fallback\n" + " default: false\n" ++ " -o readdirplus|no_readdirplus\n" ++ " enable/disable readirplus\n" ++ " default: readdirplus except with " ++ "cache=never\n" + ); + } + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6480c51..8b1784f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -117,6 +117,8 @@ struct lo_data { + double timeout; + int cache; + int timeout_set; ++ int readdirplus_set; ++ int readdirplus_clear; + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ +@@ -140,6 +142,8 @@ static const struct fuse_opt lo_opts[] = { + { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, + { "norace", offsetof(struct lo_data, norace), 1 }, ++ { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, ++ { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, + FUSE_OPT_END + }; + static bool use_syslog = false; +@@ -478,7 +482,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } +- if (lo->cache == CACHE_NEVER) { ++ if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || ++ lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); + conn->want &= ~FUSE_CAP_READDIRPLUS; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch new file mode 100644 index 0000000..4b02779 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch @@ -0,0 +1,198 @@ +From af14ef1dba9356e566c9c7531b8fd23361c2b16d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:12 +0100 +Subject: [PATCH 041/116] virtiofsd: passthrough_ll: create new files in + caller's context +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-38-dgilbert@redhat.com> +Patchwork-id: 93488 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 037/112] virtiofsd: passthrough_ll: create new files in caller's context +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +We need to create files in the caller's context. Otherwise after +creating a file, the caller might not be able to do file operations on +that file. + +Changed effective uid/gid to caller's uid/gid, create file and then +switch back to uid/gid 0. + +Use syscall(setresuid, ...) otherwise glibc does some magic to change EUID +in all threads, which is not what we want. + +Signed-off-by: Vivek Goyal +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 929cfb7a9a1b101cdfc9ac19807ecab4c81a13e4) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 96 +++++++++++++++++++++++++++++++++++++--- + 1 file changed, 91 insertions(+), 5 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index cd27c09..5e06179 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -83,6 +84,11 @@ struct lo_inode { + uint64_t refcount; /* protected by lo->mutex */ + }; + ++struct lo_cred { ++ uid_t euid; ++ gid_t egid; ++}; ++ + enum { + CACHE_NEVER, + CACHE_NORMAL, +@@ -383,6 +389,69 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + } + } + ++/* ++ * On some archs, setres*id is limited to 2^16 but they ++ * provide setres*id32 variants that allow 2^32. ++ * Others just let setres*id do 2^32 anyway. ++ */ ++#ifdef SYS_setresgid32 ++#define OURSYS_setresgid SYS_setresgid32 ++#else ++#define OURSYS_setresgid SYS_setresgid ++#endif ++ ++#ifdef SYS_setresuid32 ++#define OURSYS_setresuid SYS_setresuid32 ++#else ++#define OURSYS_setresuid SYS_setresuid ++#endif ++ ++/* ++ * Change to uid/gid of caller so that file is created with ++ * ownership of caller. ++ * TODO: What about selinux context? ++ */ ++static int lo_change_cred(fuse_req_t req, struct lo_cred *old) ++{ ++ int res; ++ ++ old->euid = geteuid(); ++ old->egid = getegid(); ++ ++ res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); ++ if (res == -1) { ++ return errno; ++ } ++ ++ res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); ++ if (res == -1) { ++ int errno_save = errno; ++ ++ syscall(OURSYS_setresgid, -1, old->egid, -1); ++ return errno_save; ++ } ++ ++ return 0; ++} ++ ++/* Regain Privileges */ ++static void lo_restore_cred(struct lo_cred *old) ++{ ++ int res; ++ ++ res = syscall(OURSYS_setresuid, -1, old->euid, -1); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); ++ exit(1); ++ } ++ ++ res = syscall(OURSYS_setresgid, -1, old->egid, -1); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); ++ exit(1); ++ } ++} ++ + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + const char *name, mode_t mode, dev_t rdev, + const char *link) +@@ -391,12 +460,21 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + int saverr; + struct lo_inode *dir = lo_inode(req, parent); + struct fuse_entry_param e; ++ struct lo_cred old = {}; + + saverr = ENOMEM; + ++ saverr = lo_change_cred(req, &old); ++ if (saverr) { ++ goto out; ++ } ++ + res = mknod_wrapper(dir->fd, name, link, mode, rdev); + + saverr = errno; ++ ++ lo_restore_cred(&old); ++ + if (res == -1) { + goto out; + } +@@ -794,26 +872,34 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_data *lo = lo_data(req); + struct fuse_entry_param e; + int err; ++ struct lo_cred old = {}; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", + parent, name); + } + ++ err = lo_change_cred(req, &old); ++ if (err) { ++ goto out; ++ } ++ + fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); +- } ++ err = fd == -1 ? errno : 0; ++ lo_restore_cred(&old); + +- fi->fh = fd; ++ if (!err) { ++ fi->fh = fd; ++ err = lo_do_lookup(req, parent, name, &e); ++ } + if (lo->cache == CACHE_NEVER) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; + } + +- err = lo_do_lookup(req, parent, name, &e); ++out: + if (err) { + fuse_reply_err(req, err); + } else { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch new file mode 100644 index 0000000..4a531a3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch @@ -0,0 +1,50 @@ +From bbf92338e5e5eed796d511d2bd3c3686b7d1e5fd Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:46 +0100 +Subject: [PATCH 075/116] virtiofsd: passthrough_ll: disable readdirplus on + cache=never +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-72-dgilbert@redhat.com> +Patchwork-id: 93525 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 071/112] virtiofsd: passthrough_ll: disable readdirplus on cache=never +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +...because the attributes sent in the READDIRPLUS reply would be discarded +anyway. + +Signed-off-by: Miklos Szeredi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ddcbabcb0ea177be3ec3500726b699c7c26ffd93) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 18d69ab..6480c51 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -478,6 +478,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } ++ if (lo->cache == CACHE_NEVER) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); ++ conn->want &= ~FUSE_CAP_READDIRPLUS; ++ } + } + + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch new file mode 100644 index 0000000..00e11b4 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch @@ -0,0 +1,143 @@ +From 5e33269d5fbc4ba4614bab4a6b9e0ef759bebcb7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:10 +0100 +Subject: [PATCH 099/116] virtiofsd: passthrough_ll: fix refcounting on + remove/rename +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-96-dgilbert@redhat.com> +Patchwork-id: 93549 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 095/112] virtiofsd: passthrough_ll: fix refcounting on remove/rename +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9257e514d861afa759c36704e1904d43ca3fec88) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 49 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c819b5f..e3a6d6b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1140,17 +1140,42 @@ out_err: + fuse_reply_err(req, saverr); + } + ++static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, ++ const char *name) ++{ ++ int res; ++ struct stat attr; ++ ++ res = fstatat(lo_fd(req, parent), name, &attr, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ return NULL; ++ } ++ ++ return lo_find(lo_data(req), &attr); ++} ++ + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ struct lo_inode *inode; ++ struct lo_data *lo = lo_data(req); ++ + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ inode = lookup_name(req, parent, name); ++ if (!inode) { ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + + fuse_reply_err(req, res == -1 ? errno : 0); ++ unref_inode_lolocked(lo, inode, 1); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1158,12 +1183,23 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + unsigned int flags) + { + int res; ++ struct lo_inode *oldinode; ++ struct lo_inode *newinode; ++ struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ oldinode = lookup_name(req, parent, name); ++ newinode = lookup_name(req, newparent, newname); ++ ++ if (!oldinode) { ++ fuse_reply_err(req, EIO); ++ goto out; ++ } ++ + if (flags) { + #ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); +@@ -1176,26 +1212,38 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + fuse_reply_err(req, res == -1 ? errno : 0); + } + #endif +- return; ++ goto out; + } + + res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); + + fuse_reply_err(req, res == -1 ? errno : 0); ++out: ++ unref_inode_lolocked(lo, oldinode, 1); ++ unref_inode_lolocked(lo, newinode, 1); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ struct lo_inode *inode; ++ struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ inode = lookup_name(req, parent, name); ++ if (!inode) { ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, 0); + + fuse_reply_err(req, res == -1 ? errno : 0); ++ unref_inode_lolocked(lo, inode, 1); + } + + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch new file mode 100644 index 0000000..b0be1f9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch @@ -0,0 +1,211 @@ +From 44f4434b1305f6ff47b4f63fafcf39bcea9e4ceb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:52 +0100 +Subject: [PATCH 081/116] virtiofsd: passthrough_ll: use hashtable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-78-dgilbert@redhat.com> +Patchwork-id: 93528 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 077/112] virtiofsd: passthrough_ll: use hashtable +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Improve performance of inode lookup by using a hash table. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bfc50a6e06b10b2f9dbaf6c1a89dd523322e016f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 81 ++++++++++++++++++++++------------------ + 1 file changed, 45 insertions(+), 36 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b40f287..b176a31 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -84,13 +84,15 @@ struct lo_map { + ssize_t freelist; + }; + ++struct lo_key { ++ ino_t ino; ++ dev_t dev; ++}; ++ + struct lo_inode { +- struct lo_inode *next; /* protected by lo->mutex */ +- struct lo_inode *prev; /* protected by lo->mutex */ + int fd; + bool is_symlink; +- ino_t ino; +- dev_t dev; ++ struct lo_key key; + uint64_t refcount; /* protected by lo->mutex */ + fuse_ino_t fuse_ino; + }; +@@ -119,7 +121,8 @@ struct lo_data { + int timeout_set; + int readdirplus_set; + int readdirplus_clear; +- struct lo_inode root; /* protected by lo->mutex */ ++ struct lo_inode root; ++ GHashTable *inodes; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ + struct lo_map fd_map; /* protected by lo->mutex */ +@@ -573,7 +576,7 @@ retry: + } + goto fail_unref; + } +- if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { ++ if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { + if (!retries) { + fuse_log(FUSE_LOG_WARNING, + "%s: failed to match last\n", __func__); +@@ -753,19 +756,20 @@ out_err: + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + { + struct lo_inode *p; +- struct lo_inode *ret = NULL; ++ struct lo_key key = { ++ .ino = st->st_ino, ++ .dev = st->st_dev, ++ }; + + pthread_mutex_lock(&lo->mutex); +- for (p = lo->root.next; p != &lo->root; p = p->next) { +- if (p->ino == st->st_ino && p->dev == st->st_dev) { +- assert(p->refcount > 0); +- ret = p; +- ret->refcount++; +- break; +- } ++ p = g_hash_table_lookup(lo->inodes, &key); ++ if (p) { ++ assert(p->refcount > 0); ++ p->refcount++; + } + pthread_mutex_unlock(&lo->mutex); +- return ret; ++ ++ return p; + } + + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -810,8 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + close(newfd); + newfd = -1; + } else { +- struct lo_inode *prev, *next; +- + saverr = ENOMEM; + inode = calloc(1, sizeof(struct lo_inode)); + if (!inode) { +@@ -822,17 +824,12 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->refcount = 1; + inode->fd = newfd; + newfd = -1; +- inode->ino = e->attr.st_ino; +- inode->dev = e->attr.st_dev; ++ inode->key.ino = e->attr.st_ino; ++ inode->key.dev = e->attr.st_dev; + + pthread_mutex_lock(&lo->mutex); + inode->fuse_ino = lo_add_inode_mapping(req, inode); +- prev = &lo->root; +- next = prev->next; +- next->prev = inode; +- inode->next = next; +- inode->prev = prev; +- prev->next = inode; ++ g_hash_table_insert(lo->inodes, &inode->key, inode); + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; +@@ -1162,14 +1159,8 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + assert(inode->refcount >= n); + inode->refcount -= n; + if (!inode->refcount) { +- struct lo_inode *prev, *next; +- +- prev = inode->prev; +- next = inode->next; +- next->prev = prev; +- prev->next = next; +- + lo_map_remove(&lo->ino_map, inode->fuse_ino); ++ g_hash_table_remove(lo->inodes, &inode->key); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +@@ -1369,7 +1360,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + /* Hide root's parent directory */ + if (dinode == &lo->root && strcmp(name, "..") == 0) { +- e.attr.st_ino = lo->root.ino; ++ e.attr.st_ino = lo->root.key.ino; + e.attr.st_mode = DT_DIR << 12; + } + +@@ -2370,11 +2361,26 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + + root->is_symlink = false; + root->fd = fd; +- root->ino = stat.st_ino; +- root->dev = stat.st_dev; ++ root->key.ino = stat.st_ino; ++ root->key.dev = stat.st_dev; + root->refcount = 2; + } + ++static guint lo_key_hash(gconstpointer key) ++{ ++ const struct lo_key *lkey = key; ++ ++ return (guint)lkey->ino + (guint)lkey->dev; ++} ++ ++static gboolean lo_key_equal(gconstpointer a, gconstpointer b) ++{ ++ const struct lo_key *la = a; ++ const struct lo_key *lb = b; ++ ++ return la->ino == lb->ino && la->dev == lb->dev; ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2392,7 +2398,7 @@ int main(int argc, char *argv[]) + umask(0); + + pthread_mutex_init(&lo.mutex, NULL); +- lo.root.next = lo.root.prev = &lo.root; ++ lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; + lo.root.fuse_ino = FUSE_ROOT_ID; + lo.cache = CACHE_AUTO; +@@ -2522,6 +2528,9 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ if (lo.inodes) { ++ g_hash_table_destroy(lo.inodes); ++ } + lo_map_destroy(&lo.fd_map); + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch new file mode 100644 index 0000000..68eb03e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch @@ -0,0 +1,54 @@ +From feb005dfeb15dd5ac5156c994f323ab4c573b1fc Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:24 +0100 +Subject: [PATCH 053/116] virtiofsd: prevent ".." escape in lo_do_lookup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-50-dgilbert@redhat.com> +Patchwork-id: 93500 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 049/112] virtiofsd: prevent ".." escape in lo_do_lookup() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 854684bc0b3d63eb90b3abdfe471c2e4271ef176) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e375406..79d5966 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -624,12 +624,17 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + int res; + int saverr; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode; ++ struct lo_inode *inode, *dir = lo_inode(req, parent); + + memset(e, 0, sizeof(*e)); + e->attr_timeout = lo->timeout; + e->entry_timeout = lo->timeout; + ++ /* Do not allow escaping root directory */ ++ if (dir == &lo->root && strcmp(name, "..") == 0) { ++ name = "."; ++ } ++ + newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); + if (newfd == -1) { + goto out_err; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch new file mode 100644 index 0000000..5f97cbf --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch @@ -0,0 +1,108 @@ +From 97e232e75bbc0032f4a309d248f383384612eafe Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:25 +0100 +Subject: [PATCH 054/116] virtiofsd: prevent ".." escape in lo_do_readdir() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-51-dgilbert@redhat.com> +Patchwork-id: 93507 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 050/112] virtiofsd: prevent ".." escape in lo_do_readdir() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Construct a fake dirent for the root directory's ".." entry. This hides +the parent directory from the FUSE client. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 752272da2b68a2312f0e11fc5303015a6c3ee1ac) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++-------------- + 1 file changed, 22 insertions(+), 14 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 79d5966..e3d65c3 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1149,19 +1149,25 @@ out_err: + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { ++ struct lo_data *lo = lo_data(req); + struct lo_dirp *d; ++ struct lo_inode *dinode; + char *buf = NULL; + char *p; + size_t rem = size; +- int err = ENOMEM; ++ int err = EBADF; + +- (void)ino; ++ dinode = lo_inode(req, ino); ++ if (!dinode) { ++ goto error; ++ } + + d = lo_dirp(req, fi); + if (!d) { + goto error; + } + ++ err = ENOMEM; + buf = calloc(1, size); + if (!buf) { + goto error; +@@ -1192,15 +1198,21 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + } + nextoff = d->entry->d_off; + name = d->entry->d_name; ++ + fuse_ino_t entry_ino = 0; ++ struct fuse_entry_param e = (struct fuse_entry_param){ ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ ++ /* Hide root's parent directory */ ++ if (dinode == &lo->root && strcmp(name, "..") == 0) { ++ e.attr.st_ino = lo->root.ino; ++ e.attr.st_mode = DT_DIR << 12; ++ } ++ + if (plus) { +- struct fuse_entry_param e; +- if (is_dot_or_dotdot(name)) { +- e = (struct fuse_entry_param){ +- .attr.st_ino = d->entry->d_ino, +- .attr.st_mode = d->entry->d_type << 12, +- }; +- } else { ++ if (!is_dot_or_dotdot(name)) { + err = lo_do_lookup(req, ino, name, &e); + if (err) { + goto error; +@@ -1210,11 +1222,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); + } else { +- struct stat st = { +- .st_ino = d->entry->d_ino, +- .st_mode = d->entry->d_type << 12, +- }; +- entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); ++ entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); + } + if (entsize > rem) { + if (entry_ino != 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch b/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch new file mode 100644 index 0000000..be7c120 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch @@ -0,0 +1,103 @@ +From 249c02ae54739dc5894ee1b2905bbe8f1e79e909 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:20 +0100 +Subject: [PATCH 109/116] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-106-dgilbert@redhat.com> +Patchwork-id: 93562 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 105/112] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +When running with multiple threads it can be tricky to handle +FUSE_INIT/FUSE_DESTROY in parallel with other request types or in +parallel with themselves. Serialize FUSE_INIT and FUSE_DESTROY so that +malicious clients cannot trigger race conditions. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cdc497c6925be745bc895355bd4674a17a4b2a8b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 18 ++++++++++++++++++ + 2 files changed, 19 insertions(+) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index a20854f..1447d86 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -61,6 +61,7 @@ struct fuse_session { + struct fuse_req list; + struct fuse_req interrupts; + pthread_mutex_t lock; ++ pthread_rwlock_t init_rwlock; + int got_destroy; + int broken_splice_nonblock; + uint64_t notify_ctr; +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index dab6a31..79a4031 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2428,6 +2428,19 @@ void fuse_session_process_buf_int(struct fuse_session *se, + req->ctx.pid = in->pid; + req->ch = ch; + ++ /* ++ * INIT and DESTROY requests are serialized, all other request types ++ * run in parallel. This prevents races between FUSE_INIT and ordinary ++ * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and ++ * FUSE_DESTROY and FUSE_DESTROY. ++ */ ++ if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT || ++ in->opcode == FUSE_DESTROY) { ++ pthread_rwlock_wrlock(&se->init_rwlock); ++ } else { ++ pthread_rwlock_rdlock(&se->init_rwlock); ++ } ++ + err = EIO; + if (!se->got_init) { + enum fuse_opcode expected; +@@ -2485,10 +2498,13 @@ void fuse_session_process_buf_int(struct fuse_session *se, + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); + } ++ ++ pthread_rwlock_unlock(&se->init_rwlock); + return; + + reply_err: + fuse_reply_err(req, err); ++ pthread_rwlock_unlock(&se->init_rwlock); + } + + #define LL_OPTION(n, o, v) \ +@@ -2531,6 +2547,7 @@ void fuse_session_destroy(struct fuse_session *se) + se->op.destroy(se->userdata); + } + } ++ pthread_rwlock_destroy(&se->init_rwlock); + pthread_mutex_destroy(&se->lock); + free(se->cuse_data); + if (se->fd != -1) { +@@ -2610,6 +2627,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + list_init_req(&se->list); + list_init_req(&se->interrupts); + fuse_mutex_init(&se->lock); ++ pthread_rwlock_init(&se->init_rwlock, NULL); + + memcpy(&se->op, op, op_size); + se->owner = getuid(); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch b/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch new file mode 100644 index 0000000..8eabede --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch @@ -0,0 +1,149 @@ +From 69c6a829f8136a8c95ccdf480f2fd0173d64b6ec Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:05 +0100 +Subject: [PATCH 094/116] virtiofsd: prevent fv_queue_thread() vs virtio_loop() + races +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-91-dgilbert@redhat.com> +Patchwork-id: 93544 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 090/112] virtiofsd: prevent fv_queue_thread() vs virtio_loop() races +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +We call into libvhost-user from the virtqueue handler thread and the +vhost-user message processing thread without a lock. There is nothing +protecting the virtqueue handler thread if the vhost-user message +processing thread changes the virtqueue or memory table while it is +running. + +This patch introduces a read-write lock. Virtqueue handler threads are +readers. The vhost-user message processing thread is a writer. This +will allow concurrency for multiqueue in the future while protecting +against fv_queue_thread() vs virtio_loop() races. + +Note that the critical sections could be made smaller but it would be +more invasive and require libvhost-user changes. Let's start simple and +improve performance later, if necessary. Another option would be an +RCU-style approach with lighter-weight primitives. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e7b337326d594b71b07cd6dbb332c49c122c80a4) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 34 +++++++++++++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index fb8d6d1..f6242f9 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -59,6 +59,18 @@ struct fv_VuDev { + struct fuse_session *se; + + /* ++ * Either handle virtqueues or vhost-user protocol messages. Don't do ++ * both at the same time since that could lead to race conditions if ++ * virtqueues or memory tables change while another thread is accessing ++ * them. ++ * ++ * The assumptions are: ++ * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev. ++ * 2. virtio_loop() reads/writes virtqueues and VuDev. ++ */ ++ pthread_rwlock_t vu_dispatch_rwlock; ++ ++ /* + * The following pair of fields are only accessed in the main + * virtio_loop + */ +@@ -415,6 +427,8 @@ static void *fv_queue_thread(void *opaque) + qi->qidx, qi->kick_fd); + while (1) { + struct pollfd pf[2]; ++ int ret; ++ + pf[0].fd = qi->kick_fd; + pf[0].events = POLLIN; + pf[0].revents = 0; +@@ -461,6 +475,9 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); + break; + } ++ /* Mutual exclusion with virtio_loop() */ ++ ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ assert(ret == 0); /* there is no possible error case */ + /* out is from guest, in is too guest */ + unsigned int in_bytes, out_bytes; + vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); +@@ -469,6 +486,7 @@ static void *fv_queue_thread(void *opaque) + "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + ++ + while (1) { + bool allocated_bufv = false; + struct fuse_bufvec bufv; +@@ -597,6 +615,8 @@ static void *fv_queue_thread(void *opaque) + free(elem); + elem = NULL; + } ++ ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + } + out: + pthread_mutex_destroy(&ch.lock); +@@ -711,6 +731,8 @@ int virtio_loop(struct fuse_session *se) + + while (!fuse_session_exited(se)) { + struct pollfd pf[1]; ++ bool ok; ++ int ret; + pf[0].fd = se->vu_socketfd; + pf[0].events = POLLIN; + pf[0].revents = 0; +@@ -735,7 +757,15 @@ int virtio_loop(struct fuse_session *se) + } + assert(pf[0].revents & POLLIN); + fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); +- if (!vu_dispatch(&se->virtio_dev->dev)) { ++ /* Mutual exclusion with fv_queue_thread() */ ++ ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock); ++ assert(ret == 0); /* there is no possible error case */ ++ ++ ok = vu_dispatch(&se->virtio_dev->dev); ++ ++ pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock); ++ ++ if (!ok) { + fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); + break; + } +@@ -877,6 +907,7 @@ int virtio_session_mount(struct fuse_session *se) + + se->vu_socketfd = data_sock; + se->virtio_dev->se = se; ++ pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); + vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, + fv_remove_watch, &fv_iface); + +@@ -892,6 +923,7 @@ void virtio_session_close(struct fuse_session *se) + } + + free(se->virtio_dev->qi); ++ pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock); + free(se->virtio_dev); + se->virtio_dev = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch b/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch new file mode 100644 index 0000000..acafa41 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch @@ -0,0 +1,147 @@ +From 2e58ff6978f8433fc8672d2e357c6f0f5f36d24f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:07 +0100 +Subject: [PATCH 096/116] virtiofsd: prevent races with lo_dirp_put() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-93-dgilbert@redhat.com> +Patchwork-id: 93546 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 092/112] virtiofsd: prevent races with lo_dirp_put() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce lo_dirp_put() so that FUSE_RELEASEDIR does not cause +use-after-free races with other threads that are accessing lo_dirp. + +Also make lo_releasedir() atomic to prevent FUSE_RELEASEDIR racing with +itself. This prevents double-frees. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit acefdde73b403576a241ebd8dbe8431ddc0d9442) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++++++++++++++++------ + 1 file changed, 35 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 690edbc..2d703b5 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1284,11 +1284,28 @@ static void lo_readlink(fuse_req_t req, fuse_ino_t ino) + } + + struct lo_dirp { ++ gint refcount; + DIR *dp; + struct dirent *entry; + off_t offset; + }; + ++static void lo_dirp_put(struct lo_dirp **dp) ++{ ++ struct lo_dirp *d = *dp; ++ ++ if (!d) { ++ return; ++ } ++ *dp = NULL; ++ ++ if (g_atomic_int_dec_and_test(&d->refcount)) { ++ closedir(d->dp); ++ free(d); ++ } ++} ++ ++/* Call lo_dirp_put() on the return value when no longer needed */ + static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); +@@ -1296,6 +1313,9 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + + pthread_mutex_lock(&lo->mutex); + elem = lo_map_get(&lo->dirp_map, fi->fh); ++ if (elem) { ++ g_atomic_int_inc(&elem->dirp->refcount); ++ } + pthread_mutex_unlock(&lo->mutex); + if (!elem) { + return NULL; +@@ -1331,6 +1351,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + d->offset = 0; + d->entry = NULL; + ++ g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ + pthread_mutex_lock(&lo->mutex); + fh = lo_add_dirp_mapping(req, d); + pthread_mutex_unlock(&lo->mutex); +@@ -1364,7 +1385,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { + struct lo_data *lo = lo_data(req); +- struct lo_dirp *d; ++ struct lo_dirp *d = NULL; + struct lo_inode *dinode; + char *buf = NULL; + char *p; +@@ -1454,6 +1475,8 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + err = 0; + error: ++ lo_dirp_put(&d); ++ + /* + * If there's an error, we can only signal it if we haven't stored + * any entries yet - otherwise we'd end up with wrong lookup +@@ -1484,22 +1507,25 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; + struct lo_dirp *d; + + (void)ino; + +- d = lo_dirp(req, fi); +- if (!d) { ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->dirp_map, fi->fh); ++ if (!elem) { ++ pthread_mutex_unlock(&lo->mutex); + fuse_reply_err(req, EBADF); + return; + } + +- pthread_mutex_lock(&lo->mutex); ++ d = elem->dirp; + lo_map_remove(&lo->dirp_map, fi->fh); + pthread_mutex_unlock(&lo->mutex); + +- closedir(d->dp); +- free(d); ++ lo_dirp_put(&d); /* paired with lo_opendir() */ ++ + fuse_reply_err(req, 0); + } + +@@ -1710,6 +1736,9 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + } else { + res = fsync(fd); + } ++ ++ lo_dirp_put(&d); ++ + fuse_reply_err(req, res == -1 ? errno : 0); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch b/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch new file mode 100644 index 0000000..056559d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch @@ -0,0 +1,469 @@ +From 5c9bbd00e8f8c944d9e8e22e7d1cf08cb8fddd6b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:37 +0100 +Subject: [PATCH 066/116] virtiofsd: print log only when priority is high + enough +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-63-dgilbert@redhat.com> +Patchwork-id: 93518 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 062/112] virtiofsd: print log only when priority is high enough +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +Introduce "-o log_level=" command line option to specify current log +level (priority), valid values are "debug info warn err", e.g. + + ./virtiofsd -o log_level=debug ... + +So only log priority higher than "debug" will be printed to +stderr/syslog. And the default level is info. + +The "-o debug"/"-d" options are kept, and imply debug log level. + +Signed-off-by: Eryu Guan +dgilbert: Reworked for libfuse's log_func +Signed-off-by: Dr. David Alan Gilbert +with fix by: +Signed-off-by: Xiao Yang +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d240314a1a18a1d914af1b5763fe8c9a572e6409) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 75 ++++++++++--------------- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 8 ++- + tools/virtiofsd/passthrough_ll.c | 118 ++++++++++++++++----------------------- + 4 files changed, 87 insertions(+), 115 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 6ceb33d..a7a1968 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -158,19 +158,17 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct fuse_out_header *out = iov[0].iov_base; + + out->len = iov_length(iov, count); +- if (se->debug) { +- if (out->unique == 0) { +- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, +- out->len); +- } else if (out->error) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, error: %i (%s), outsize: %i\n", +- (unsigned long long)out->unique, out->error, +- strerror(-out->error), out->len); +- } else { +- fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", +- (unsigned long long)out->unique, out->len); +- } ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, ++ out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long)out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", ++ (unsigned long long)out->unique, out->len); + } + + if (fuse_lowlevel_is_virtio(se)) { +@@ -1662,10 +1660,8 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, + return; + } + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", +- (unsigned long long)arg->unique); +- } ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long)arg->unique); + + req->u.i.unique = arg->unique; + +@@ -1901,13 +1897,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + } + } + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); +- if (arg->major == 7 && arg->minor >= 6) { +- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); +- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", +- arg->max_readahead); +- } ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); + } + se->conn.proto_major = arg->major; + se->conn.proto_minor = arg->minor; +@@ -2116,19 +2109,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + outarg.congestion_threshold = se->conn.congestion_threshold; + outarg.time_gran = se->conn.time_gran; + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, +- outarg.minor); +- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); +- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", +- outarg.max_readahead); +- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); +- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", +- outarg.max_background); +- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", +- outarg.congestion_threshold); +- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); +- } ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); + + send_reply_ok(req, &outarg, outargsize); + } +@@ -2407,14 +2395,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, + in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); + assert(in); /* caller guarantees the input buffer is large enough */ + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " +- "pid: %u\n", +- (unsigned long long)in->unique, +- opname((enum fuse_opcode)in->opcode), in->opcode, +- (unsigned long long)in->nodeid, buf->size, in->pid); +- } ++ fuse_log( ++ FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", ++ (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode), ++ in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid); + + req = fuse_ll_alloc_req(se); + if (req == NULL) { +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index f2750bc..138041e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1796,6 +1796,7 @@ struct fuse_cmdline_opts { + int show_help; + int print_capabilities; + int syslog; ++ int log_level; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 9692ef9..6d50a46 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -34,7 +34,6 @@ + t, offsetof(struct fuse_cmdline_opts, p), v \ + } + +- + static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("-h", show_help), + FUSE_HELPER_OPT("--help", show_help), +@@ -55,6 +54,10 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), + FUSE_HELPER_OPT("--syslog", syslog), ++ FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), ++ FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), ++ FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING), ++ FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR), + FUSE_OPT_END + }; + +@@ -142,6 +145,9 @@ void fuse_cmdline_help(void) + " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" ++ " -o log_level= log level, default to \"info\"\n" ++ " level could be one of \"debug, " ++ "info, warn, err\"\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" + " allowed (default: 10)\n" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0372aca..ff6910f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -37,6 +37,7 @@ + + #include "qemu/osdep.h" + #include "fuse_virtio.h" ++#include "fuse_log.h" + #include "fuse_lowlevel.h" + #include + #include +@@ -140,6 +141,7 @@ static const struct fuse_opt lo_opts[] = { + FUSE_OPT_END + }; + static bool use_syslog = false; ++static int current_log_level; + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + +@@ -458,11 +460,6 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) + return inode ? inode->fd : -1; + } + +-static bool lo_debug(fuse_req_t req) +-{ +- return lo_data(req)->debug != 0; +-} +- + static void lo_init(void *userdata, struct fuse_conn_info *conn) + { + struct lo_data *lo = (struct lo_data *)userdata; +@@ -472,15 +469,11 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + } + + if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { +- if (lo->debug) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } + if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- if (lo->debug) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } + } +@@ -823,10 +816,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + e->ino = inode->fuse_ino; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e->ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e->ino); + + return 0; + +@@ -843,10 +834,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + struct fuse_entry_param e; + int err; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, ++ name); + + /* + * Don't use is_safe_path_component(), allow "." and ".." for NFS export +@@ -971,10 +960,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e.ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); + return; +@@ -1074,10 +1061,8 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + pthread_mutex_unlock(&lo->mutex); + e.ino = inode->fuse_ino; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e.ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); + return; +@@ -1171,11 +1156,9 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + return; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long)ino, (unsigned long long)inode->refcount, +- (unsigned long long)nlookup); +- } ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)nlookup); + + unref_inode(lo, inode, nlookup); + } +@@ -1445,10 +1428,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + int err; + struct lo_cred old = {}; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, ++ name); + + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); +@@ -1525,10 +1506,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + char buf[64]; + struct lo_data *lo = lo_data(req); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, +- fi->flags); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, ++ fi->flags); + + /* + * With writeback cache, kernel may send read requests even +@@ -1644,12 +1623,10 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + { + struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_read(ino=%" PRIu64 ", size=%zd, " +- "off=%lu)\n", +- ino, size, (unsigned long)offset); +- } ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ++ ino, size, (unsigned long)offset); + + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; + buf.buf[0].fd = lo_fi_fd(req, fi); +@@ -1671,11 +1648,9 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].fd = lo_fi_fd(req, fi); + out_buf.buf[0].pos = off; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, +- out_buf.buf[0].size, (unsigned long)off); +- } ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, ++ out_buf.buf[0].size, (unsigned long)off); + + /* + * If kill_priv is set, drop CAP_FSETID which should lead to kernel +@@ -1774,11 +1749,8 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, +- size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ++ ino, name, size); + + if (inode->is_symlink) { + /* Sorry, no race free way to getxattr on symlink. */ +@@ -1852,10 +1824,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", +- ino, size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, ++ size); + + if (inode->is_symlink) { + /* Sorry, no race free way to listxattr on symlink. */ +@@ -1929,11 +1899,8 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", +- ino, name, value, size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ++ ", name=%s value=%s size=%zd)\n", ino, name, value, size); + + if (inode->is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ +@@ -1978,10 +1945,8 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", +- ino, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, ++ name); + + if (inode->is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ +@@ -2303,6 +2268,10 @@ static void setup_nofile_rlimit(void) + + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { ++ if (current_log_level < level) { ++ return; ++ } ++ + if (use_syslog) { + int priority = LOG_ERR; + switch (level) { +@@ -2401,8 +2370,19 @@ int main(int argc, char *argv[]) + return 1; + } + ++ /* ++ * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, ++ * and we don't use this log level). ++ */ ++ if (opts.log_level != 0) { ++ current_log_level = opts.log_level; ++ } + lo.debug = opts.debug; ++ if (lo.debug) { ++ current_log_level = FUSE_LOG_DEBUG; ++ } + lo.root.refcount = 2; ++ + if (lo.source) { + struct stat stat; + int res; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch b/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch new file mode 100644 index 0000000..87fff99 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch @@ -0,0 +1,533 @@ +From b0db5e666aaa43eadff3e60a1ada704f33b03074 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:19 +0100 +Subject: [PATCH 108/116] virtiofsd: process requests in a thread pool +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-105-dgilbert@redhat.com> +Patchwork-id: 93554 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 104/112] virtiofsd: process requests in a thread pool +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce a thread pool so that fv_queue_thread() just pops +VuVirtqElements and hands them to the thread pool. For the time being +only one worker thread is allowed since passthrough_ll.c is not +thread-safe yet. Future patches will lift this restriction so that +multiple FUSE requests can be processed in parallel. + +The main new concept is struct FVRequest, which contains both +VuVirtqElement and struct fuse_chan. We now have fv_VuDev for a device, +fv_QueueInfo for a virtqueue, and FVRequest for a request. Some of +fv_QueueInfo's fields are moved into FVRequest because they are +per-request. The name FVRequest conforms to QEMU coding style and I +expect the struct fv_* types will be renamed in a future refactoring. + +This patch series is not optimal. fbuf reuse is dropped so each request +does malloc(se->bufsize), but there is no clean and cheap way to keep +this with a thread pool. The vq_lock mutex is held for longer than +necessary, especially during the eventfd_write() syscall. Performance +can be improved in the future. + +prctl(2) had to be added to the seccomp whitelist because glib invokes +it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a3d756c5aecccc4c0e51060a7e2f1c87bf8f1180) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 359 +++++++++++++++++++++++------------------- + 1 file changed, 201 insertions(+), 158 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index f6242f9..0dcf2ef 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -22,6 +22,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -37,17 +38,28 @@ + struct fv_VuDev; + struct fv_QueueInfo { + pthread_t thread; ++ /* ++ * This lock protects the VuVirtq preventing races between ++ * fv_queue_thread() and fv_queue_worker(). ++ */ ++ pthread_mutex_t vq_lock; ++ + struct fv_VuDev *virtio_dev; + + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; + int kill_fd; /* For killing the thread */ ++}; + +- /* The element for the command currently being processed */ +- VuVirtqElement *qe; ++/* A FUSE request */ ++typedef struct { ++ VuVirtqElement elem; ++ struct fuse_chan ch; ++ ++ /* Used to complete requests that involve no reply */ + bool reply_sent; +-}; ++} FVRequest; + + /* + * We pass the dev element into libvhost-user +@@ -191,8 +203,11 @@ static void copy_iov(struct iovec *src_iov, int src_count, + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count) + { +- VuVirtqElement *elem; +- VuVirtq *q; ++ FVRequest *req = container_of(ch, FVRequest, ch); ++ struct fv_QueueInfo *qi = ch->qi; ++ VuDev *dev = &se->virtio_dev->dev; ++ VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ VuVirtqElement *elem = &req->elem; + int ret = 0; + + assert(count >= 1); +@@ -205,11 +220,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + + /* unique == 0 is notification, which we don't support */ + assert(out->unique); +- /* For virtio we always have ch */ +- assert(ch); +- assert(!ch->qi->reply_sent); +- elem = ch->qi->qe; +- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ assert(!req->reply_sent); + + /* The 'in' part of the elem is to qemu */ + unsigned int in_num = elem->in_num; +@@ -236,9 +247,15 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + + copy_iov(iov, count, in_sg, in_num, tosend_len); +- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); +- vu_queue_notify(&se->virtio_dev->dev, q); +- ch->qi->reply_sent = true; ++ ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, tosend_len); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ ++ req->reply_sent = true; + + err: + return ret; +@@ -254,9 +271,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count, struct fuse_bufvec *buf, + size_t len) + { ++ FVRequest *req = container_of(ch, FVRequest, ch); ++ struct fv_QueueInfo *qi = ch->qi; ++ VuDev *dev = &se->virtio_dev->dev; ++ VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ VuVirtqElement *elem = &req->elem; + int ret = 0; +- VuVirtqElement *elem; +- VuVirtq *q; + + assert(count >= 1); + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); +@@ -275,11 +295,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + /* unique == 0 is notification which we don't support */ + assert(out->unique); + +- /* For virtio we always have ch */ +- assert(ch); +- assert(!ch->qi->reply_sent); +- elem = ch->qi->qe; +- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ assert(!req->reply_sent); + + /* The 'in' part of the elem is to qemu */ + unsigned int in_num = elem->in_num; +@@ -395,33 +411,175 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + + ret = 0; + +- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); +- vu_queue_notify(&se->virtio_dev->dev, q); ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, tosend_len); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + + err: + if (ret == 0) { +- ch->qi->reply_sent = true; ++ req->reply_sent = true; + } + + return ret; + } + ++/* Process one FVRequest in a thread pool */ ++static void fv_queue_worker(gpointer data, gpointer user_data) ++{ ++ struct fv_QueueInfo *qi = user_data; ++ struct fuse_session *se = qi->virtio_dev->se; ++ struct VuDev *dev = &qi->virtio_dev->dev; ++ FVRequest *req = data; ++ VuVirtqElement *elem = &req->elem; ++ struct fuse_buf fbuf = {}; ++ bool allocated_bufv = false; ++ struct fuse_bufvec bufv; ++ struct fuse_bufvec *pbufv; ++ ++ assert(se->bufsize > sizeof(struct fuse_in_header)); ++ ++ /* ++ * An element contains one request and the space to send our response ++ * They're spread over multiple descriptors in a scatter/gather set ++ * and we can't trust the guest to keep them still; so copy in/out. ++ */ ++ fbuf.mem = malloc(se->bufsize); ++ assert(fbuf.mem); ++ ++ fuse_mutex_init(&req->ch.lock); ++ req->ch.fd = -1; ++ req->ch.qi = qi; ++ ++ /* The 'out' part of the elem is from qemu */ ++ unsigned int out_num = elem->out_num; ++ struct iovec *out_sg = elem->out_sg; ++ size_t out_len = iov_size(out_sg, out_num); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: elem %d: with %d out desc of length %zd\n", ++ __func__, elem->index, out_num, out_len); ++ ++ /* ++ * The elem should contain a 'fuse_in_header' (in to fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (out_len < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ if (out_len > se->bufsize) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__, ++ elem->index); ++ assert(0); /* TODO */ ++ } ++ /* Copy just the first element and look at it */ ++ copy_from_iov(&fbuf, 1, out_sg); ++ ++ pbufv = NULL; /* Compiler thinks an unitialised path */ ++ if (out_num > 2 && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ /* ++ * For a write we don't actually need to copy the ++ * data, we can just do it straight out of guest memory ++ * but we must still copy the headers in case the guest ++ * was nasty and changed them while we were using them. ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); ++ ++ /* copy the fuse_write_in header afte rthe fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } ++ ++ allocated_bufv = true; ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; ++ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = 0; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } else { ++ /* Normal (non fast write) path */ ++ ++ /* Copy the rest of the buffer */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_len; ++ ++ /* TODO! Endianness of header */ ++ ++ /* TODO: Add checks for fuse_session_exited */ ++ bufv.buf[0] = fbuf; ++ bufv.count = 1; ++ pbufv = &bufv; ++ } ++ pbufv->idx = 0; ++ pbufv->off = 0; ++ fuse_session_process_buf_int(se, pbufv, &req->ch); ++ ++out: ++ if (allocated_bufv) { ++ free(pbufv); ++ } ++ ++ /* If the request has no reply, still recycle the virtqueue element */ ++ if (!req->reply_sent) { ++ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__, ++ elem->index); ++ ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, 0); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ } ++ ++ pthread_mutex_destroy(&req->ch.lock); ++ free(fbuf.mem); ++ free(req); ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; + struct VuDev *dev = &qi->virtio_dev->dev; + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); +- struct fuse_session *se = qi->virtio_dev->se; +- struct fuse_chan ch; +- struct fuse_buf fbuf; ++ GThreadPool *pool; + +- fbuf.mem = NULL; +- fbuf.flags = 0; +- +- fuse_mutex_init(&ch.lock); +- ch.fd = (int)0xdaff0d111; +- ch.qi = qi; ++ pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, ++ TRUE, NULL); ++ if (!pool) { ++ fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); ++ return NULL; ++ } + + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); +@@ -478,6 +636,7 @@ static void *fv_queue_thread(void *opaque) + /* Mutual exclusion with virtio_loop() */ + ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); + assert(ret == 0); /* there is no possible error case */ ++ pthread_mutex_lock(&qi->vq_lock); + /* out is from guest, in is too guest */ + unsigned int in_bytes, out_bytes; + vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); +@@ -486,141 +645,22 @@ static void *fv_queue_thread(void *opaque) + "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + +- + while (1) { +- bool allocated_bufv = false; +- struct fuse_bufvec bufv; +- struct fuse_bufvec *pbufv; +- +- /* +- * An element contains one request and the space to send our +- * response They're spread over multiple descriptors in a +- * scatter/gather set and we can't trust the guest to keep them +- * still; so copy in/out. +- */ +- VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); +- if (!elem) { ++ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); ++ if (!req) { + break; + } + +- qi->qe = elem; +- qi->reply_sent = false; ++ req->reply_sent = false; + +- if (!fbuf.mem) { +- fbuf.mem = malloc(se->bufsize); +- assert(fbuf.mem); +- assert(se->bufsize > sizeof(struct fuse_in_header)); +- } +- /* The 'out' part of the elem is from qemu */ +- unsigned int out_num = elem->out_num; +- struct iovec *out_sg = elem->out_sg; +- size_t out_len = iov_size(out_sg, out_num); +- fuse_log(FUSE_LOG_DEBUG, +- "%s: elem %d: with %d out desc of length %zd\n", __func__, +- elem->index, out_num, out_len); +- +- /* +- * The elem should contain a 'fuse_in_header' (in to fuse) +- * plus the data based on the len in the header. +- */ +- if (out_len < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", +- __func__, elem->index); +- assert(0); /* TODO */ +- } +- if (out_len > se->bufsize) { +- fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", +- __func__, elem->index); +- assert(0); /* TODO */ +- } +- /* Copy just the first element and look at it */ +- copy_from_iov(&fbuf, 1, out_sg); +- +- if (out_num > 2 && +- out_sg[0].iov_len == sizeof(struct fuse_in_header) && +- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && +- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { +- /* +- * For a write we don't actually need to copy the +- * data, we can just do it straight out of guest memory +- * but we must still copy the headers in case the guest +- * was nasty and changed them while we were using them. +- */ +- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); +- +- /* copy the fuse_write_in header after the fuse_in_header */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; +- +- /* Allocate the bufv, with space for the rest of the iov */ +- allocated_bufv = true; +- pbufv = malloc(sizeof(struct fuse_bufvec) + +- sizeof(struct fuse_buf) * (out_num - 2)); +- if (!pbufv) { +- vu_queue_unpop(dev, q, elem, 0); +- free(elem); +- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", +- __func__); +- goto out; +- } +- +- pbufv->count = 1; +- pbufv->buf[0] = fbuf; +- +- size_t iovindex, pbufvindex; +- iovindex = 2; /* 2 headers, separate iovs */ +- pbufvindex = 1; /* 2 headers, 1 fusebuf */ +- +- for (; iovindex < out_num; iovindex++, pbufvindex++) { +- pbufv->count++; +- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ +- pbufv->buf[pbufvindex].flags = 0; +- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; +- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; +- } +- } else { +- /* Normal (non fast write) path */ +- +- /* Copy the rest of the buffer */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_len; +- +- /* TODO! Endianness of header */ +- +- /* TODO: Add checks for fuse_session_exited */ +- bufv.buf[0] = fbuf; +- bufv.count = 1; +- pbufv = &bufv; +- } +- pbufv->idx = 0; +- pbufv->off = 0; +- fuse_session_process_buf_int(se, pbufv, &ch); +- +- if (allocated_bufv) { +- free(pbufv); +- } +- +- if (!qi->reply_sent) { +- fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +- __func__, elem->index); +- /* I think we've still got to recycle the element */ +- vu_queue_push(dev, q, elem, 0); +- vu_queue_notify(dev, q); +- } +- qi->qe = NULL; +- free(elem); +- elem = NULL; ++ g_thread_pool_push(pool, req, NULL); + } + ++ pthread_mutex_unlock(&qi->vq_lock); + pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + } +-out: +- pthread_mutex_destroy(&ch.lock); +- free(fbuf.mem); ++ ++ g_thread_pool_free(pool, FALSE, TRUE); + + return NULL; + } +@@ -643,6 +683,7 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) + fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", + __func__, qidx, ret); + } ++ pthread_mutex_destroy(&ourqi->vq_lock); + close(ourqi->kill_fd); + ourqi->kick_fd = -1; + free(vud->qi[qidx]); +@@ -696,6 +737,8 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + + ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); + assert(ourqi->kill_fd != -1); ++ pthread_mutex_init(&ourqi->vq_lock, NULL); ++ + if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { + fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", + __func__, qidx); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch b/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch new file mode 100644 index 0000000..181e32d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch @@ -0,0 +1,159 @@ +From a8a1835a82510be7d2d6edcc28a60e506a2cedad Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:46 +0100 +Subject: [PATCH 015/116] virtiofsd: remove mountpoint dummy argument +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-12-dgilbert@redhat.com> +Patchwork-id: 93466 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 011/112] virtiofsd: remove mountpoint dummy argument +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Classic FUSE file system daemons take a mountpoint argument but +virtiofsd exposes a vhost-user UNIX domain socket instead. The +mountpoint argument is not used by virtiofsd but the user is still +required to pass a dummy argument on the command-line. + +Remove the mountpoint argument to clean up the command-line. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 67aab02272f6cb47c56420f60b370c184961b5ca) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 2 +- + tools/virtiofsd/fuse_lowlevel.h | 4 +--- + tools/virtiofsd/helper.c | 20 +++----------------- + tools/virtiofsd/passthrough_ll.c | 12 ++---------- + 4 files changed, 7 insertions(+), 31 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 5c9cb52..2f32c68 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2455,7 +2455,7 @@ out1: + return NULL; + } + +-int fuse_session_mount(struct fuse_session *se, const char *mountpoint) ++int fuse_session_mount(struct fuse_session *se) + { + int fd; + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index adb9054..8d8909b 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1863,7 +1863,6 @@ struct fuse_cmdline_opts { + int foreground; + int debug; + int nodefault_subtype; +- char *mountpoint; + int show_version; + int show_help; + unsigned int max_idle_threads; +@@ -1924,12 +1923,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + /** + * Mount a FUSE file system. + * +- * @param mountpoint the mount point path + * @param se session object + * + * @return 0 on success, -1 on failure. + **/ +-int fuse_session_mount(struct fuse_session *se, const char *mountpoint); ++int fuse_session_mount(struct fuse_session *se); + + /** + * Enter a single threaded, blocking event loop. +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5711dd2..5e6f205 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -140,27 +140,13 @@ void fuse_cmdline_help(void) + static int fuse_helper_opt_proc(void *data, const char *arg, int key, + struct fuse_args *outargs) + { ++ (void)data; + (void)outargs; +- struct fuse_cmdline_opts *opts = data; + + switch (key) { + case FUSE_OPT_KEY_NONOPT: +- if (!opts->mountpoint) { +- if (fuse_mnt_parse_fuse_fd(arg) != -1) { +- return fuse_opt_add_opt(&opts->mountpoint, arg); +- } +- +- char mountpoint[PATH_MAX] = ""; +- if (realpath(arg, mountpoint) == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, +- strerror(errno)); +- return -1; +- } +- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); +- } else { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); +- return -1; +- } ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; + + default: + /* Pass through unknown options */ +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c5850ef..9377718 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1297,7 +1297,7 @@ int main(int argc, char *argv[]) + return 1; + } + if (opts.show_help) { +- printf("usage: %s [options] \n\n", argv[0]); ++ printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); + fuse_lowlevel_help(); + ret = 0; +@@ -1308,13 +1308,6 @@ int main(int argc, char *argv[]) + goto err_out1; + } + +- if (opts.mountpoint == NULL) { +- printf("usage: %s [options] \n", argv[0]); +- printf(" %s --help\n", argv[0]); +- ret = 1; +- goto err_out1; +- } +- + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { + return 1; + } +@@ -1374,7 +1367,7 @@ int main(int argc, char *argv[]) + goto err_out2; + } + +- if (fuse_session_mount(se, opts.mountpoint) != 0) { ++ if (fuse_session_mount(se) != 0) { + goto err_out3; + } + +@@ -1393,7 +1386,6 @@ err_out3: + err_out2: + fuse_session_destroy(se); + err_out1: +- free(opts.mountpoint); + fuse_opt_free_args(&args); + + if (lo.root.fd >= 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch b/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch new file mode 100644 index 0000000..98fb968 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch @@ -0,0 +1,294 @@ +From e5534c0d4b866f61dbafa8d2422a24ab956189c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:47 +0100 +Subject: [PATCH 016/116] virtiofsd: remove unused notify reply support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-13-dgilbert@redhat.com> +Patchwork-id: 93467 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 012/112] virtiofsd: remove unused notify reply support +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Notify reply support is unused by virtiofsd. The code would need to be +updated to validate input buffer sizes. Remove this unused code since +changes to it are untestable. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 64c6f408a29ef03e9b8da9f5a5d8fd511b0d801e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 147 +--------------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 47 ------------- + 2 files changed, 1 insertion(+), 193 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 2f32c68..eb0ec49 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -31,12 +31,6 @@ + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + +-#define container_of(ptr, type, member) \ +- ({ \ +- const typeof(((type *)0)->member) *__mptr = (ptr); \ +- (type *)((char *)__mptr - offsetof(type, member)); \ +- }) +- + struct fuse_pollhandle { + uint64_t kh; + struct fuse_session *se; +@@ -1862,52 +1856,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + send_reply_ok(req, NULL, 0); + } + +-static void list_del_nreq(struct fuse_notify_req *nreq) +-{ +- struct fuse_notify_req *prev = nreq->prev; +- struct fuse_notify_req *next = nreq->next; +- prev->next = next; +- next->prev = prev; +-} +- +-static void list_add_nreq(struct fuse_notify_req *nreq, +- struct fuse_notify_req *next) +-{ +- struct fuse_notify_req *prev = next->prev; +- nreq->next = next; +- nreq->prev = prev; +- prev->next = nreq; +- next->prev = nreq; +-} +- +-static void list_init_nreq(struct fuse_notify_req *nreq) +-{ +- nreq->next = nreq; +- nreq->prev = nreq; +-} +- +-static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, const struct fuse_buf *buf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_notify_req *nreq; +- struct fuse_notify_req *head; +- +- pthread_mutex_lock(&se->lock); +- head = &se->notify_list; +- for (nreq = head->next; nreq != head; nreq = nreq->next) { +- if (nreq->unique == req->unique) { +- list_del_nreq(nreq); +- break; +- } +- } +- pthread_mutex_unlock(&se->lock); +- +- if (nreq != head) { +- nreq->reply(nreq, req, nodeid, inarg, buf); +- } +-} +- + static int send_notify_iov(struct fuse_session *se, int notify_code, + struct iovec *iov, int count) + { +@@ -2059,95 +2007,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + return res; + } + +-struct fuse_retrieve_req { +- struct fuse_notify_req nreq; +- void *cookie; +-}; +- +-static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, +- fuse_ino_t ino, const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_retrieve_req *rreq = +- container_of(nreq, struct fuse_retrieve_req, nreq); +- const struct fuse_notify_retrieve_in *arg = inarg; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); +- +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); +- fuse_reply_none(req); +- goto out; +- } +- bufv.buf[0].size = arg->size; +- +- if (se->op.retrieve_reply) { +- se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); +- } else { +- fuse_reply_none(req); +- } +-out: +- free(rreq); +-} +- +-int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie) +-{ +- struct fuse_notify_retrieve_out outarg; +- struct iovec iov[2]; +- struct fuse_retrieve_req *rreq; +- int err; +- +- if (!se) { +- return -EINVAL; +- } +- +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { +- return -ENOSYS; +- } +- +- rreq = malloc(sizeof(*rreq)); +- if (rreq == NULL) { +- return -ENOMEM; +- } +- +- pthread_mutex_lock(&se->lock); +- rreq->cookie = cookie; +- rreq->nreq.unique = se->notify_ctr++; +- rreq->nreq.reply = fuse_ll_retrieve_reply; +- list_add_nreq(&rreq->nreq, &se->notify_list); +- pthread_mutex_unlock(&se->lock); +- +- outarg.notify_unique = rreq->nreq.unique; +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; +- +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- +- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); +- if (err) { +- pthread_mutex_lock(&se->lock); +- list_del_nreq(&rreq->nreq); +- pthread_mutex_unlock(&se->lock); +- free(rreq); +- } +- +- return err; +-} +- + void *fuse_req_userdata(fuse_req_t req) + { + return req->se->userdata; +@@ -2226,7 +2085,7 @@ static struct { + [FUSE_POLL] = { do_poll, "POLL" }, + [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, + [FUSE_DESTROY] = { do_destroy, "DESTROY" }, +- [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, ++ [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" }, + [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, + [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, +@@ -2333,8 +2192,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { + do_write_buf(req, in->nodeid, inarg, buf); +- } else if (in->opcode == FUSE_NOTIFY_REPLY) { +- do_notify_reply(req, in->nodeid, inarg, buf); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +@@ -2437,8 +2294,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + + list_init_req(&se->list); + list_init_req(&se->interrupts); +- list_init_nreq(&se->notify_list); +- se->notify_ctr = 1; + fuse_mutex_init(&se->lock); + + memcpy(&se->op, op, op_size); +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 8d8909b..12a84b4 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1085,21 +1085,6 @@ struct fuse_lowlevel_ops { + off_t off, struct fuse_file_info *fi); + + /** +- * Callback function for the retrieve request +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() +- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() +- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() +- * @param bufv the buffer containing the returned data +- */ +- void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv); +- +- /** + * Forget about multiple inodes + * + * See description of the forget function for more +@@ -1726,38 +1711,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); +-/** +- * Retrieve data from the kernel buffers +- * +- * Retrieve data in the kernel buffers belonging to the given inode. +- * If successful then the retrieve_reply() method will be called with +- * the returned data. +- * +- * Only present pages are returned in the retrieve reply. Retrieving +- * stops when it finds a non-present page and only data prior to that +- * is returned. +- * +- * If this function returns an error, then the retrieve will not be +- * completed and no reply will be sent. +- * +- * This function doesn't change the dirty state of pages in the kernel +- * buffer. For dirty pages the write() method will be called +- * regardless of having been retrieved previously. +- * +- * Added in FUSE protocol version 7.15. If the kernel does not support +- * this (or a newer) version, the function will return -ENOSYS and do +- * nothing. +- * +- * @param se the session object +- * @param ino the inode number +- * @param size the number of bytes to retrieve +- * @param offset the starting offset into the file to retrieve from +- * @param cookie user data to supply to the reply callback +- * @return zero for success, -errno for failure +- */ +-int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie); +- + + /* + * Utility functions +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch b/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch new file mode 100644 index 0000000..97a0db3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch @@ -0,0 +1,139 @@ +From e01a6e68d799ed2af0ca3b04d75818ba62b18682 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:08 +0100 +Subject: [PATCH 097/116] virtiofsd: rename inode->refcount to inode->nlookup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-94-dgilbert@redhat.com> +Patchwork-id: 93547 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 093/112] virtiofsd: rename inode->refcount to inode->nlookup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +This reference counter plays a specific role in the FUSE protocol. It's +not a generic object reference counter and the FUSE kernel code calls it +"nlookup". + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1222f015558fc34cea02aa3a5a92de608c82cec8) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 2d703b5..c819b5f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -99,7 +99,20 @@ struct lo_inode { + int fd; + bool is_symlink; + struct lo_key key; +- uint64_t refcount; /* protected by lo->mutex */ ++ ++ /* ++ * This counter keeps the inode alive during the FUSE session. ++ * Incremented when the FUSE inode number is sent in a reply ++ * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is ++ * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. ++ * ++ * Note that this value is untrusted because the client can manipulate ++ * it arbitrarily using FUSE_FORGET requests. ++ * ++ * Protected by lo->mutex. ++ */ ++ uint64_t nlookup; ++ + fuse_ino_t fuse_ino; + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ +@@ -568,7 +581,7 @@ retry: + if (last == path) { + p = &lo->root; + pthread_mutex_lock(&lo->mutex); +- p->refcount++; ++ p->nlookup++; + pthread_mutex_unlock(&lo->mutex); + } else { + *last = '\0'; +@@ -786,8 +799,8 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + pthread_mutex_lock(&lo->mutex); + p = g_hash_table_lookup(lo->inodes, &key); + if (p) { +- assert(p->refcount > 0); +- p->refcount++; ++ assert(p->nlookup > 0); ++ p->nlookup++; + } + pthread_mutex_unlock(&lo->mutex); + +@@ -855,7 +868,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + inode->is_symlink = S_ISLNK(e->attr.st_mode); +- inode->refcount = 1; ++ inode->nlookup = 1; + inode->fd = newfd; + newfd = -1; + inode->key.ino = e->attr.st_ino; +@@ -1112,7 +1125,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + } + + pthread_mutex_lock(&lo->mutex); +- inode->refcount++; ++ inode->nlookup++; + pthread_mutex_unlock(&lo->mutex); + e.ino = inode->fuse_ino; + +@@ -1193,9 +1206,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + + pthread_mutex_lock(&lo->mutex); +- assert(inode->refcount >= n); +- inode->refcount -= n; +- if (!inode->refcount) { ++ assert(inode->nlookup >= n); ++ inode->nlookup -= n; ++ if (!inode->nlookup) { + lo_map_remove(&lo->ino_map, inode->fuse_ino); + g_hash_table_remove(lo->inodes, &inode->key); + if (g_hash_table_size(inode->posix_locks)) { +@@ -1216,7 +1229,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) + struct lo_inode *inode = value; + struct lo_data *lo = user_data; + +- inode->refcount = 0; ++ inode->nlookup = 0; + lo_map_remove(&lo->ino_map, inode->fuse_ino); + close(inode->fd); + +@@ -1241,7 +1254,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + } + + fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)ino, (unsigned long long)inode->nlookup, + (unsigned long long)nlookup); + + unref_inode_lolocked(lo, inode, nlookup); +@@ -2609,7 +2622,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + root->fd = fd; + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; +- root->refcount = 2; ++ root->nlookup = 2; + } + + static guint lo_key_hash(gconstpointer key) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch b/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch new file mode 100644 index 0000000..95858f8 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch @@ -0,0 +1,94 @@ +From cfa4550f926e7a07757853f94273f2d1589cb9d3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:48 +0100 +Subject: [PATCH 077/116] virtiofsd: rename unref_inode() to + unref_inode_lolocked() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-74-dgilbert@redhat.com> +Patchwork-id: 93526 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 073/112] virtiofsd: rename unref_inode() to unref_inode_lolocked() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 95d2715791c60b5dc2d22e4eb7b83217273296fa) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 8b1784f..de12e75 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -148,8 +148,8 @@ static const struct fuse_opt lo_opts[] = { + }; + static bool use_syslog = false; + static int current_log_level; +- +-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n); + + static struct { + pthread_mutex_t mutex; +@@ -586,7 +586,7 @@ retry: + return 0; + + fail_unref: +- unref_inode(lo, p, 1); ++ unref_inode_lolocked(lo, p, 1); + fail: + if (retries) { + retries--; +@@ -624,7 +624,7 @@ fallback: + res = lo_parent_and_name(lo, inode, path, &parent); + if (res != -1) { + res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); +- unref_inode(lo, parent, 1); ++ unref_inode_lolocked(lo, parent, 1); + } + + return res; +@@ -1027,7 +1027,7 @@ fallback: + res = lo_parent_and_name(lo, inode, path, &parent); + if (res != -1) { + res = linkat(parent->fd, path, dfd, name, 0); +- unref_inode(lo, parent, 1); ++ unref_inode_lolocked(lo, parent, 1); + } + + return res; +@@ -1141,7 +1141,8 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + fuse_reply_err(req, res == -1 ? errno : 0); + } + +-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n) + { + if (!inode) { + return; +@@ -1181,7 +1182,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + (unsigned long long)ino, (unsigned long long)inode->refcount, + (unsigned long long)nlookup); + +- unref_inode(lo, inode, nlookup); ++ unref_inode_lolocked(lo, inode, nlookup); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch b/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch new file mode 100644 index 0000000..ab6f751 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch @@ -0,0 +1,166 @@ +From c7ae38df696e4be432fd418c670dcea892b910a7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:27 +0100 +Subject: [PATCH 056/116] virtiofsd: sandbox mount namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-53-dgilbert@redhat.com> +Patchwork-id: 93504 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 052/112] virtiofsd: sandbox mount namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Use a mount namespace with the shared directory tree mounted at "/" and +no other mounts. + +This prevents symlink escape attacks because symlink targets are +resolved only against the shared directory and cannot go outside it. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Peng Tao +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 89 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e2e2211..0570453 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1943,6 +1944,58 @@ static void print_capabilities(void) + printf("}\n"); + } + ++/* This magic is based on lxc's lxc_pivot_root() */ ++static void setup_pivot_root(const char *source) ++{ ++ int oldroot; ++ int newroot; ++ ++ oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (oldroot < 0) { ++ fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); ++ exit(1); ++ } ++ ++ newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (newroot < 0) { ++ fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); ++ exit(1); ++ } ++ ++ if (fchdir(newroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); ++ exit(1); ++ } ++ ++ if (syscall(__NR_pivot_root, ".", ".") < 0) { ++ fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); ++ exit(1); ++ } ++ ++ if (fchdir(oldroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); ++ exit(1); ++ } ++ ++ if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); ++ exit(1); ++ } ++ ++ if (umount2(".", MNT_DETACH) < 0) { ++ fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); ++ exit(1); ++ } ++ ++ if (fchdir(newroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); ++ exit(1); ++ } ++ ++ close(newroot); ++ close(oldroot); ++} ++ + static void setup_proc_self_fd(struct lo_data *lo) + { + lo->proc_self_fd = open("/proc/self/fd", O_PATH); +@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo) + } + } + ++/* ++ * Make the source directory our root so symlinks cannot escape and no other ++ * files are accessible. ++ */ ++static void setup_mount_namespace(const char *source) ++{ ++ if (unshare(CLONE_NEWNS) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); ++ exit(1); ++ } ++ ++ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); ++ exit(1); ++ } ++ ++ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); ++ exit(1); ++ } ++ ++ setup_pivot_root(source); ++} ++ ++/* ++ * Lock down this process to prevent access to other processes or files outside ++ * source directory. This reduces the impact of arbitrary code execution bugs. ++ */ ++static void setup_sandbox(struct lo_data *lo) ++{ ++ setup_mount_namespace(lo->source); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[]) + } + + lo.root.fd = open(lo.source, O_PATH); ++ + if (lo.root.fd == -1) { + fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); + exit(1); +@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[]) + /* Must be after daemonize to get the right /proc/self/fd */ + setup_proc_self_fd(&lo); + ++ setup_sandbox(&lo); ++ + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch b/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch new file mode 100644 index 0000000..e54248c --- /dev/null +++ b/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch @@ -0,0 +1,93 @@ +From 4cc435b3a8a9a419cc85ee883d5184f810f91e52 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:34 +0100 +Subject: [PATCH 063/116] virtiofsd: set maximum RLIMIT_NOFILE limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-60-dgilbert@redhat.com> +Patchwork-id: 93516 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 059/112] virtiofsd: set maximum RLIMIT_NOFILE limit +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +virtiofsd can exceed the default open file descriptor limit easily on +most systems. Take advantage of the fact that it runs as root to raise +the limit. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 01a6dc95ec7f71eeff9963fe3cb03d85225fba3e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index d53cb1e..c281d81 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -53,6 +53,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2268,6 +2269,35 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + setup_seccomp(); + } + ++/* Raise the maximum number of open file descriptors */ ++static void setup_nofile_rlimit(void) ++{ ++ const rlim_t max_fds = 1000000; ++ struct rlimit rlim; ++ ++ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { ++ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); ++ exit(1); ++ } ++ ++ if (rlim.rlim_cur >= max_fds) { ++ return; /* nothing to do */ ++ } ++ ++ rlim.rlim_cur = max_fds; ++ rlim.rlim_max = max_fds; ++ ++ if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { ++ /* Ignore SELinux denials */ ++ if (errno == EPERM) { ++ return; ++ } ++ ++ fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); ++ exit(1); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2389,6 +2419,8 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ setup_nofile_rlimit(); ++ + /* Must be before sandbox since it wants /proc */ + setup_capng(); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch b/SOURCES/kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch new file mode 100644 index 0000000..ce74f4d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch @@ -0,0 +1,88 @@ +From 301f19f2ebd617e43e3a8e7bdcf694de580fe689 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 5 May 2020 16:35:56 +0100 +Subject: [PATCH 5/9] virtiofsd: stay below fs.file-max sysctl value + (CVE-2020-10717) + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200505163600.22956-4-dgilbert@redhat.com> +Patchwork-id: 96271 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 3/7] virtiofsd: stay below fs.file-max sysctl value (CVE-2020-10717) +Bugzilla: 1817445 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Michael S. Tsirkin + +From: Stefan Hajnoczi + +The system-wide fs.file-max sysctl value determines how many files can +be open. It defaults to a value calculated based on the machine's RAM +size. Previously virtiofsd would try to set RLIMIT_NOFILE to 1,000,000 +and this allowed the FUSE client to exhaust the number of open files +system-wide on Linux hosts with less than 10 GB of RAM! + +Take fs.file-max into account when choosing the default RLIMIT_NOFILE +value. + +Fixes: CVE-2020-10717 +Reported-by: Yuval Avrahami +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Dr. David Alan Gilbert +Message-Id: <20200501140644.220940-3-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8c1d353d107b4fc344e27f2f08ea7fa25de2eea2) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/helper.c | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 9b3eddc..5b222ea 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -176,7 +176,8 @@ void fuse_cmdline_help(void) + " default: no_xattr\n" + " --rlimit-nofile= set maximum number of file descriptors\n" + " (0 leaves rlimit unchanged)\n" +- " default: 1,000,000 if the current rlimit is lower\n" ++ " default: min(1000000, fs.file-max - 16384)\n" ++ " if the current rlimit is lower\n" + ); + } + +@@ -199,9 +200,32 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, + + static unsigned long get_default_rlimit_nofile(void) + { ++ g_autofree gchar *file_max_str = NULL; ++ const rlim_t reserved_fds = 16384; /* leave at least this many fds free */ + rlim_t max_fds = 1000000; /* our default RLIMIT_NOFILE target */ ++ rlim_t file_max; + struct rlimit rlim; + ++ /* ++ * Reduce max_fds below the system-wide maximum, if necessary. This ++ * ensures there are fds available for other processes so we don't ++ * cause resource exhaustion. ++ */ ++ if (!g_file_get_contents("/proc/sys/fs/file-max", &file_max_str, ++ NULL, NULL)) { ++ fuse_log(FUSE_LOG_ERR, "can't read /proc/sys/fs/file-max\n"); ++ exit(1); ++ } ++ file_max = g_ascii_strtoull(file_max_str, NULL, 10); ++ if (file_max < 2 * reserved_fds) { ++ fuse_log(FUSE_LOG_ERR, ++ "The fs.file-max sysctl is too low (%lu) to allow a " ++ "reasonable number of open files.\n", ++ (unsigned long)file_max); ++ exit(1); ++ } ++ max_fds = MIN(file_max - reserved_fds, max_fds); ++ + if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { + fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); + exit(1); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch b/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch new file mode 100644 index 0000000..be6b244 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch @@ -0,0 +1,72 @@ +From 06a24b54c94345b436d888a48b92fafa967c3d58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:25 +0100 +Subject: [PATCH 114/116] virtiofsd: stop all queue threads on exit in + virtio_loop() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-111-dgilbert@redhat.com> +Patchwork-id: 93564 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 110/112] virtiofsd: stop all queue threads on exit in virtio_loop() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +On guest graceful shutdown, virtiofsd receives VHOST_USER_GET_VRING_BASE +request from VMM and shuts down virtqueues by calling fv_set_started(), +which joins fv_queue_thread() threads. So when virtio_loop() returns, +there should be no thread is still accessing data in fuse session and/or +virtio dev. + +But on abnormal exit, e.g. guest got killed for whatever reason, +vhost-user socket is closed and virtio_loop() breaks out the main loop +and returns to main(). But it's possible fv_queue_worker()s are still +working and accessing fuse session and virtio dev, which results in +crash or use-after-free. + +Fix it by stopping fv_queue_thread()s before virtio_loop() returns, +to make sure there's no-one could access fuse session and virtio dev. + +Reported-by: Qingming Su +Signed-off-by: Eryu Guan +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9883df8ccae6d744a0c8d9cbf9d62b1797d70ebd) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 9f65823..80a6e92 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -815,6 +815,19 @@ int virtio_loop(struct fuse_session *se) + } + } + ++ /* ++ * Make sure all fv_queue_thread()s quit on exit, as we're about to ++ * free virtio dev and fuse session, no one should access them anymore. ++ */ ++ for (int i = 0; i < se->virtio_dev->nqueues; i++) { ++ if (!se->virtio_dev->qi[i]) { ++ continue; ++ } ++ ++ fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i); ++ fv_queue_cleanup_thread(se->virtio_dev, i); ++ } ++ + fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); + + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch b/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch new file mode 100644 index 0000000..f595ffa --- /dev/null +++ b/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch @@ -0,0 +1,83 @@ +From 1744329bcba4a3e1a82cec3b1a34b3fbf0a9d7cf Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:54 +0100 +Subject: [PATCH 083/116] virtiofsd: support nanosecond resolution for file + timestamp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-80-dgilbert@redhat.com> +Patchwork-id: 93535 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 079/112] virtiofsd: support nanosecond resolution for file timestamp +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Jiufei Xue + +Define HAVE_STRUCT_STAT_ST_ATIM to 1 if `st_atim' is member of `struct +stat' which means support nanosecond resolution for the file timestamp +fields. + +Signed-off-by: Jiufei Xue +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8a792b034d4b315251fd842bb4c73a133aa1368f) +Signed-off-by: Miroslav Rezanina +--- + configure | 16 ++++++++++++++++ + tools/virtiofsd/fuse_misc.h | 1 + + 2 files changed, 17 insertions(+) + +diff --git a/configure b/configure +index 7831618..5120c14 100755 +--- a/configure ++++ b/configure +@@ -5218,6 +5218,19 @@ if compile_prog "" "" ; then + strchrnul=yes + fi + ++######################################### ++# check if we have st_atim ++ ++st_atim=no ++cat > $TMPC << EOF ++#include ++#include ++int main(void) { return offsetof(struct stat, st_atim); } ++EOF ++if compile_prog "" "" ; then ++ st_atim=yes ++fi ++ + ########################################## + # check if trace backend exists + +@@ -6919,6 +6932,9 @@ fi + if test "$strchrnul" = "yes" ; then + echo "HAVE_STRCHRNUL=y" >> $config_host_mak + fi ++if test "$st_atim" = "yes" ; then ++ echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak ++fi + if test "$byteswap_h" = "yes" ; then + echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak + fi +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +index f252baa..5c618ce 100644 +--- a/tools/virtiofsd/fuse_misc.h ++++ b/tools/virtiofsd/fuse_misc.h +@@ -7,6 +7,7 @@ + */ + + #include ++#include "config-host.h" + + /* + * Versioned symbols cannot be used in some cases because it +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch b/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch new file mode 100644 index 0000000..1bae1bf --- /dev/null +++ b/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch @@ -0,0 +1,82 @@ +From 7bc27a767bc8c78b1bca46bbe5e1d53dcd7173b4 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:18 +0100 +Subject: [PATCH 107/116] virtiofsd: use fuse_buf_writev to replace + fuse_buf_write for better performance +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-104-dgilbert@redhat.com> +Patchwork-id: 93558 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 103/112] virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: piaojun + +fuse_buf_writev() only handles the normal write in which src is buffer +and dest is fd. Specially if src buffer represents guest physical +address that can't be mapped by the daemon process, IO must be bounced +back to the VMM to do it by fuse_buf_copy(). + +Signed-off-by: Jun Piao +Suggested-by: Dr. David Alan Gilbert +Suggested-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c465bba2c90a810f6e71e4f2646b1b4ee4b478de) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 37befeb..27c1377 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -34,7 +34,6 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) + return size; + } + +-__attribute__((unused)) + static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, + struct fuse_bufvec *in_buf) + { +@@ -262,12 +261,29 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + + ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + { +- size_t copied = 0; ++ size_t copied = 0, i; + + if (dstv == srcv) { + return fuse_buf_size(dstv); + } + ++ /* ++ * use writev to improve bandwidth when all the ++ * src buffers already mapped by the daemon ++ * process ++ */ ++ for (i = 0; i < srcv->count; i++) { ++ if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { ++ break; ++ } ++ } ++ if ((i == srcv->count) && (dstv->count == 1) && ++ (dstv->idx == 0) && ++ (dstv->buf[0].flags & FUSE_BUF_IS_FD)) { ++ dstv->buf[0].pos += dstv->off; ++ return fuse_buf_writev(&dstv->buf[0], srcv); ++ } ++ + for (;;) { + const struct fuse_buf *src = fuse_bufvec_current(srcv); + const struct fuse_buf *dst = fuse_bufvec_current(dstv); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch b/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch new file mode 100644 index 0000000..feffb5e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch @@ -0,0 +1,56 @@ +From 1724f54070d33d8070ba2d22c8fac87ea65814c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:04 +0100 +Subject: [PATCH 093/116] virtiofsd: use fuse_lowlevel_is_virtio() in + fuse_session_destroy() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-90-dgilbert@redhat.com> +Patchwork-id: 93540 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 089/112] virtiofsd: use fuse_lowlevel_is_virtio() in fuse_session_destroy() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +vu_socket_path is NULL when --fd=FDNUM was used. Use +fuse_lowlevel_is_virtio() instead. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 620e9d8d9cee6df7fe71168dea950dba0cc21a4a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 70568d2..dab6a31 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2537,12 +2537,13 @@ void fuse_session_destroy(struct fuse_session *se) + close(se->fd); + } + +- if (se->vu_socket_path) { ++ if (fuse_lowlevel_is_virtio(se)) { + virtio_session_close(se); +- free(se->vu_socket_path); +- se->vu_socket_path = NULL; + } + ++ free(se->vu_socket_path); ++ se->vu_socket_path = NULL; ++ + free(se); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch b/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch new file mode 100644 index 0000000..f250ed7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch @@ -0,0 +1,390 @@ +From bce5070d1aada88154b811a08eec1586ab24fce5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:26 +0100 +Subject: [PATCH 055/116] virtiofsd: use /proc/self/fd/ O_PATH file descriptor +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-52-dgilbert@redhat.com> +Patchwork-id: 93506 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 051/112] virtiofsd: use /proc/self/fd/ O_PATH file descriptor +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Sandboxing will remove /proc from the mount namespace so we can no +longer build string paths into "/proc/self/fd/...". + +Keep an O_PATH file descriptor so we can still re-open fds via +/proc/self/fd. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9f59d175e2ca96f0b87f534dba69ea547dd35945) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 130 +++++++++++++++++++++++++++++++-------- + 1 file changed, 103 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e3d65c3..e2e2211 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -110,6 +110,9 @@ struct lo_data { + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ + struct lo_map fd_map; /* protected by lo->mutex */ ++ ++ /* An O_PATH file descriptor to /proc/self/fd/ */ ++ int proc_self_fd; + }; + + static const struct fuse_opt lo_opts[] = { +@@ -379,9 +382,9 @@ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, + int res; + + retry: +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); + +- res = readlink(procname, path, PATH_MAX); ++ res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); + if (res < 0) { + fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); + goto fail_noretry; +@@ -477,9 +480,9 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, + } + return res; + } +- sprintf(path, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "%i", inode->fd); + +- return utimensat(AT_FDCWD, path, tv, 0); ++ return utimensat(lo->proc_self_fd, path, tv, 0); + + fallback: + res = lo_parent_and_name(lo, inode, path, &parent); +@@ -535,8 +538,8 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + res = fchmod(fd, attr->st_mode); + } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = chmod(procname, attr->st_mode); ++ sprintf(procname, "%i", ifd); ++ res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); + } + if (res == -1) { + goto out_err; +@@ -552,11 +555,23 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + } + if (valid & FUSE_SET_ATTR_SIZE) { ++ int truncfd; ++ + if (fi) { +- res = ftruncate(fd, attr->st_size); ++ truncfd = fd; + } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = truncate(procname, attr->st_size); ++ sprintf(procname, "%i", ifd); ++ truncfd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (truncfd < 0) { ++ goto out_err; ++ } ++ } ++ ++ res = ftruncate(truncfd, attr->st_size); ++ if (!fi) { ++ saverr = errno; ++ close(truncfd); ++ errno = saverr; + } + if (res == -1) { + goto out_err; +@@ -874,9 +889,9 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, + return res; + } + +- sprintf(path, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "%i", inode->fd); + +- return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); ++ return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); + + fallback: + res = lo_parent_and_name(lo, inode, path, &parent); +@@ -1404,8 +1419,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fi->flags &= ~O_APPEND; + } + +- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); +- fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ sprintf(buf, "%i", lo_fd(req, ino)); ++ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); + if (fd == -1) { + return (void)fuse_reply_err(req, errno); + } +@@ -1458,7 +1473,6 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { + int res; +- (void)ino; + int fd; + char *buf; + +@@ -1466,12 +1480,14 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + (void *)fi); + + if (!fi) { +- res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ struct lo_data *lo = lo_data(req); ++ ++ res = asprintf(&buf, "%i", lo_fd(req, ino)); + if (res == -1) { + return (void)fuse_reply_err(req, errno); + } + +- fd = open(buf, O_RDWR); ++ fd = openat(lo->proc_self_fd, buf, O_RDWR); + free(buf); + if (fd == -1) { + return (void)fuse_reply_err(req, errno); +@@ -1587,11 +1603,13 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + size_t size) + { ++ struct lo_data *lo = lo_data(req); + char *value = NULL; + char procname[64]; + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1616,7 +1634,11 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } + + if (size) { + value = malloc(size); +@@ -1624,7 +1646,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out_err; + } + +- ret = getxattr(procname, name, value, size); ++ ret = fgetxattr(fd, name, value, size); + if (ret == -1) { + goto out_err; + } +@@ -1635,7 +1657,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + + fuse_reply_buf(req, value, ret); + } else { +- ret = getxattr(procname, name, NULL, 0); ++ ret = fgetxattr(fd, name, NULL, 0); + if (ret == -1) { + goto out_err; + } +@@ -1644,6 +1666,10 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + } + out_free: + free(value); ++ ++ if (fd >= 0) { ++ close(fd); ++ } + return; + + out_err: +@@ -1655,11 +1681,13 @@ out: + + static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { ++ struct lo_data *lo = lo_data(req); + char *value = NULL; + char procname[64]; + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1683,7 +1711,11 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } + + if (size) { + value = malloc(size); +@@ -1691,7 +1723,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out_err; + } + +- ret = listxattr(procname, value, size); ++ ret = flistxattr(fd, value, size); + if (ret == -1) { + goto out_err; + } +@@ -1702,7 +1734,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + + fuse_reply_buf(req, value, ret); + } else { +- ret = listxattr(procname, NULL, 0); ++ ret = flistxattr(fd, NULL, 0); + if (ret == -1) { + goto out_err; + } +@@ -1711,6 +1743,10 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + } + out_free: + free(value); ++ ++ if (fd >= 0) { ++ close(fd); ++ } + return; + + out_err: +@@ -1724,9 +1760,11 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags) + { + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1751,21 +1789,31 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } + +- ret = setxattr(procname, name, value, size, flags); ++ ret = fsetxattr(fd, name, value, size, flags); + saverr = ret == -1 ? errno : 0; + + out: ++ if (fd >= 0) { ++ close(fd); ++ } + fuse_reply_err(req, saverr); + } + + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1789,12 +1837,20 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } + +- ret = removexattr(procname, name); ++ ret = fremovexattr(fd, name); + saverr = ret == -1 ? errno : 0; + + out: ++ if (fd >= 0) { ++ close(fd); ++ } + fuse_reply_err(req, saverr); + } + +@@ -1887,12 +1943,25 @@ static void print_capabilities(void) + printf("}\n"); + } + ++static void setup_proc_self_fd(struct lo_data *lo) ++{ ++ lo->proc_self_fd = open("/proc/self/fd", O_PATH); ++ if (lo->proc_self_fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); ++ exit(1); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + struct fuse_session *se; + struct fuse_cmdline_opts opts; +- struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ struct lo_data lo = { ++ .debug = 0, ++ .writeback = 0, ++ .proc_self_fd = -1, ++ }; + struct lo_map_elem *root_elem; + int ret = -1; + +@@ -2003,6 +2072,9 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ /* Must be after daemonize to get the right /proc/self/fd */ ++ setup_proc_self_fd(&lo); ++ + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +@@ -2018,6 +2090,10 @@ err_out1: + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + ++ if (lo.proc_self_fd >= 0) { ++ close(lo.proc_self_fd); ++ } ++ + if (lo.root.fd >= 0) { + close(lo.root.fd); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch b/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch new file mode 100644 index 0000000..d60a902 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch @@ -0,0 +1,137 @@ +From 6877a6c456178d6c1ca9a0ffaabaa7e51105b2ac Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:22 +0100 +Subject: [PATCH 051/116] virtiofsd: validate input buffer sizes in + do_write_buf() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-48-dgilbert@redhat.com> +Patchwork-id: 93501 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 047/112] virtiofsd: validate input buffer sizes in do_write_buf() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +There is a small change in behavior: if fuse_write_in->size doesn't +match the input buffer size then the request is failed. Previously +write requests with 1 fuse_buf element would truncate to +fuse_write_in->size. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0ba8c3c6fce8fe949d59c1fd84d98d220ef9e759) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 49 +++++++++++++++++++++++++---------------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 7e10995..611e8b0 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1003,8 +1003,8 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- struct fuse_bufvec *ibufv) ++static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv) + { + struct fuse_session *se = req->se; + struct fuse_bufvec *pbufv = ibufv; +@@ -1012,28 +1012,27 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + .buf[0] = ibufv->buf[0], + .count = 1, + }; +- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_write_in *arg; ++ size_t arg_size = sizeof(*arg); + struct fuse_file_info fi; + + memset(&fi, 0, sizeof(fi)); ++ ++ arg = fuse_mbuf_iter_advance(iter, arg_size); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + + if (ibufv->count == 1) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- tmpbufv.buf[0].mem = PARAM(arg); +- } +- tmpbufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); +- if (tmpbufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; +- } +- tmpbufv.buf[0].size = arg->size; ++ assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ tmpbufv.buf[0].mem = ((char *)arg) + arg_size; ++ tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size; + pbufv = &tmpbufv; + } else { + /* +@@ -1043,6 +1042,13 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + ibufv->buf[0].size = 0; + } + ++ if (fuse_buf_size(pbufv) != arg->size) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: do_write_buf: buffer size doesn't match arg->size\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + +@@ -2052,12 +2058,17 @@ void fuse_session_process_buf_int(struct fuse_session *se, + struct fuse_chan *ch) + { + const struct fuse_buf *buf = bufv->buf; ++ struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; + int err; + +- in = buf->mem; ++ /* The first buffer must be a memory buffer */ ++ assert(!(buf->flags & FUSE_BUF_IS_FD)); ++ ++ in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); ++ assert(in); /* caller guarantees the input buffer is large enough */ + + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, +@@ -2129,7 +2140,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { +- do_write_buf(req, in->nodeid, inarg, bufv); ++ do_write_buf(req, in->nodeid, &iter, bufv); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-validate-path-components.patch b/SOURCES/kvm-virtiofsd-validate-path-components.patch new file mode 100644 index 0000000..b35aed7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-validate-path-components.patch @@ -0,0 +1,164 @@ +From 69ac47502848c37ca3ede00f432c0675d9eef42c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:18 +0100 +Subject: [PATCH 047/116] virtiofsd: validate path components +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-44-dgilbert@redhat.com> +Patchwork-id: 93498 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 043/112] virtiofsd: validate path components +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Several FUSE requests contain single path components. A correct FUSE +client sends well-formed path components but there is currently no input +validation in case something went wrong or the client is malicious. + +Refuse ".", "..", and paths containing '/' when we expect a path +component. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 25dae28c58d7e706b5d5db99042c9db3cef2e657) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 59 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 53 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ac380ef..e375406 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -133,6 +133,21 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); + ++static int is_dot_or_dotdot(const char *name) ++{ ++ return name[0] == '.' && ++ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); ++} ++ ++/* Is `path` a single path component that is not "." or ".."? */ ++static int is_safe_path_component(const char *path) ++{ ++ if (strchr(path, '/')) { ++ return 0; ++ } ++ ++ return !is_dot_or_dotdot(path); ++} + + static struct lo_data *lo_data(fuse_req_t req) + { +@@ -681,6 +696,15 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + parent, name); + } + ++ /* ++ * Don't use is_safe_path_component(), allow "." and ".." for NFS export ++ * support. ++ */ ++ if (strchr(name, '/')) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + err = lo_do_lookup(req, parent, name, &e); + if (err) { + fuse_reply_err(req, err); +@@ -762,6 +786,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + struct fuse_entry_param e; + struct lo_cred old = {}; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + dir = lo_inode(req, parent); + if (!dir) { + fuse_reply_err(req, EBADF); +@@ -863,6 +892,11 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + struct fuse_entry_param e; + int saverr; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + inode = lo_inode(req, ino); + if (!inode) { + fuse_reply_err(req, EBADF); +@@ -904,6 +938,10 @@ out_err: + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + +@@ -916,6 +954,11 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + { + int res; + ++ if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + if (flags) { + fuse_reply_err(req, EINVAL); + return; +@@ -930,6 +973,11 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, 0); + + fuse_reply_err(req, res == -1 ? errno : 0); +@@ -1093,12 +1141,6 @@ out_err: + fuse_reply_err(req, error); + } + +-static int is_dot_or_dotdot(const char *name) +-{ +- return name[0] == '.' && +- (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +-} +- + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { +@@ -1248,6 +1290,11 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + parent, name); + } + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + err = lo_change_cred(req, &old); + if (err) { + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch b/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch new file mode 100644 index 0000000..20add81 --- /dev/null +++ b/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch @@ -0,0 +1,56 @@ +From 247987aa987b7332eb501e00c440079b9e8e1fe7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:52 +0100 +Subject: [PATCH 021/116] vitriofsd/passthrough_ll: fix fallocate() ifdefs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-18-dgilbert@redhat.com> +Patchwork-id: 93471 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 017/112] vitriofsd/passthrough_ll: fix fallocate() ifdefs +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +1) Use correct CONFIG_FALLOCATE macro to check if fallocate() is supported.(i.e configure + script sets CONFIG_FALLOCATE intead of HAVE_FALLOCATE if fallocate() is supported) +2) Replace HAVE_POSIX_FALLOCATE with CONFIG_POSIX_FALLOCATE. + +Signed-off-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert + Merged from two of Xiao Yang's patches +(cherry picked from commit 9776457ca6f05d5900e27decb1dba2ffddf95a22) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 322a889..6c4da18 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -975,13 +975,13 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + int err = EOPNOTSUPP; + (void)ino; + +-#ifdef HAVE_FALLOCATE ++#ifdef CONFIG_FALLOCATE + err = fallocate(fi->fh, mode, offset, length); + if (err < 0) { + err = errno; + } + +-#elif defined(HAVE_POSIX_FALLOCATE) ++#elif defined(CONFIG_POSIX_FALLOCATE) + if (mode) { + fuse_reply_err(req, EOPNOTSUPP); + return; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-x86.conf b/SOURCES/kvm-x86.conf new file mode 100644 index 0000000..3f7842a --- /dev/null +++ b/SOURCES/kvm-x86.conf @@ -0,0 +1,12 @@ +# Setting modprobe kvm_intel/kvm_amd nested = 1 +# only enables Nested Virtualization until the next reboot or +# module reload. Uncomment the option applicable +# to your system below to enable the feature permanently. +# +# User changes in this file are preserved across upgrades. +# +# For Intel +#options kvm_intel nested=1 +# +# For AMD +#options kvm_amd nested=1 diff --git a/SOURCES/kvm-xhci-recheck-slot-status.patch b/SOURCES/kvm-xhci-recheck-slot-status.patch new file mode 100644 index 0000000..8bcbc2c --- /dev/null +++ b/SOURCES/kvm-xhci-recheck-slot-status.patch @@ -0,0 +1,77 @@ +From ab87c0ed2a8f0a626099261a3028bc34cfac3929 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 14 Jan 2020 20:23:31 +0000 +Subject: [PATCH 5/5] xhci: recheck slot status +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200114202331.51831-3-dgilbert@redhat.com> +Patchwork-id: 93345 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] xhci: recheck slot status +Bugzilla: 1790844 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann + +From: Gerd Hoffmann + +Factor out slot status check into a helper function. Add an additional +check after completing transfers. This is needed in case a guest +queues multiple transfers in a row and a device unplug happens while +qemu processes them. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 +Signed-off-by: Gerd Hoffmann +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20200107083606.12393-1-kraxel@redhat.com +(cherry picked from commit 236846a019c4f7aa3111026fc9a1fe09684c8978) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/hcd-xhci.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index d2b9744..646c78c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -1861,6 +1861,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, + xhci_kick_epctx(epctx, streamid); + } + ++static bool xhci_slot_ok(XHCIState *xhci, int slotid) ++{ ++ return (xhci->slots[slotid - 1].uport && ++ xhci->slots[slotid - 1].uport->dev && ++ xhci->slots[slotid - 1].uport->dev->attached); ++} ++ + static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + { + XHCIState *xhci = epctx->xhci; +@@ -1878,9 +1885,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + + /* If the device has been detached, but the guest has not noticed this + yet the 2 above checks will succeed, but we must NOT continue */ +- if (!xhci->slots[epctx->slotid - 1].uport || +- !xhci->slots[epctx->slotid - 1].uport->dev || +- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { + return; + } + +@@ -1987,6 +1992,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + } else { + xhci_fire_transfer(xhci, xfer, epctx); + } ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { ++ /* surprise removal -> stop processing */ ++ break; ++ } + if (xfer->complete) { + /* update ring dequeue ptr */ + xhci_set_ep_state(xhci, epctx, stctx, epctx->state); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-xics-Don-t-deassert-outputs.patch b/SOURCES/kvm-xics-Don-t-deassert-outputs.patch new file mode 100644 index 0000000..08ed724 --- /dev/null +++ b/SOURCES/kvm-xics-Don-t-deassert-outputs.patch @@ -0,0 +1,52 @@ +From 99b6ee4b7f63ea49e5b73f61bbf68f67252f27da Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:12 +0000 +Subject: [PATCH 02/15] xics: Don't deassert outputs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-3-dgibson@redhat.com> +Patchwork-id: 93430 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] xics: Don't deassert outputs +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +The correct way to do this is to deassert the input pins on the CPU side. +This is the case since a previous change. + +Signed-off-by: Greg Kurz +Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 4febcdd88f08422a66a1aa0dc55e1472abed3c4b) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/xics.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/intc/xics.c b/hw/intc/xics.c +index e7ac9ba..72c5dca 100644 +--- a/hw/intc/xics.c ++++ b/hw/intc/xics.c +@@ -289,9 +289,6 @@ void icp_reset(ICPState *icp) + icp->pending_priority = 0xff; + icp->mfrr = 0xff; + +- /* Make all outputs are deasserted */ +- qemu_set_irq(icp->output, 0); +- + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm.conf b/SOURCES/kvm.conf new file mode 100644 index 0000000..24e60e9 --- /dev/null +++ b/SOURCES/kvm.conf @@ -0,0 +1,3 @@ +# +# User changes in this file are preserved across upgrades. +# diff --git a/SOURCES/qemu-ga.sysconfig b/SOURCES/qemu-ga.sysconfig new file mode 100644 index 0000000..67bad0c --- /dev/null +++ b/SOURCES/qemu-ga.sysconfig @@ -0,0 +1,19 @@ +# This is a systemd environment file, not a shell script. +# It provides settings for "/lib/systemd/system/qemu-guest-agent.service". + +# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# +# You can get the list of RPC commands using "qemu-ga --blacklist='?'". +# There should be no spaces between commas and commands in the blacklist. +BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status + +# Fsfreeze hook script specification. +# +# FSFREEZE_HOOK_PATHNAME=/dev/null : disables the feature. +# +# FSFREEZE_HOOK_PATHNAME=/path/to/executable : enables the feature with the +# specified binary or shell script. +# +# FSFREEZE_HOOK_PATHNAME= : enables the feature with the +# default value (invoke "qemu-ga --help" to interrogate). +FSFREEZE_HOOK_PATHNAME=/etc/qemu-ga/fsfreeze-hook diff --git a/SOURCES/qemu-guest-agent.service b/SOURCES/qemu-guest-agent.service new file mode 100644 index 0000000..b33e951 --- /dev/null +++ b/SOURCES/qemu-guest-agent.service @@ -0,0 +1,20 @@ +[Unit] +Description=QEMU Guest Agent +BindsTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device +After=dev-virtio\x2dports-org.qemu.guest_agent.0.device +IgnoreOnIsolate=True + +[Service] +UMask=0077 +EnvironmentFile=/etc/sysconfig/qemu-ga +ExecStart=/usr/bin/qemu-ga \ + --method=virtio-serial \ + --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ + --blacklist=${BLACKLIST_RPC} \ + -F${FSFREEZE_HOOK_PATHNAME} +StandardError=syslog +Restart=always +RestartSec=0 + +[Install] +WantedBy=dev-virtio\x2dports-org.qemu.guest_agent.0.device diff --git a/SOURCES/qemu-pr-helper.service b/SOURCES/qemu-pr-helper.service new file mode 100644 index 0000000..a1d27b0 --- /dev/null +++ b/SOURCES/qemu-pr-helper.service @@ -0,0 +1,15 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Service] +WorkingDirectory=/tmp +Type=simple +ExecStart=/usr/bin/qemu-pr-helper +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/var/run +RestrictAddressFamilies=AF_UNIX +Restart=always +RestartSec=0 + +[Install] diff --git a/SOURCES/qemu-pr-helper.socket b/SOURCES/qemu-pr-helper.socket new file mode 100644 index 0000000..9d7c3e5 --- /dev/null +++ b/SOURCES/qemu-pr-helper.socket @@ -0,0 +1,9 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Socket] +ListenStream=/run/qemu-pr-helper.sock +SocketMode=0600 + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/udev-kvm-check.c b/SOURCES/udev-kvm-check.c new file mode 100644 index 0000000..cb0ecba --- /dev/null +++ b/SOURCES/udev-kvm-check.c @@ -0,0 +1,172 @@ +/* + * udev-kvm-check.c + * + * Copyright 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include + +#define DEFAULT 0 +#define FACILITY "kvm" +#define SYSCONFIG_KVM "/etc/sysconfig/kvm" + +#define COUNT_MSG \ + "%d %s now active" + +#define SUBSCRIPTION_MSG \ + "%d %s now active; your Red Hat Enterprise Linux subscription" \ + " limit is %d guests. Please review your Red Hat Enterprise Linux" \ + " subscription agreement or contact your Red Hat" \ + " support representative for more information. You" \ + " may review the Red Hat Enterprise subscription" \ + " limits at http://www.redhat.com/rhel-virt-limits" + +int get_threshold_from_file(FILE *fp) +{ + static const char key[] = "THRESHOLD="; + int pos = 0; + int thres; + int ch; + +start: + /* State START - at beginning of line, search for beginning of "THRESHOLD=" + * string. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (isspace(ch)) { + goto start; + } + if (ch == 'T') { + pos = 1; + goto key; + } + goto eol; + +eol: + /* State EOL - loop until end of line */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == '\n') { + goto start; + } + goto eol; + +key: + /* State KEY - match "THRESHOLD=" string, go to THRESHOLD if found */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == key[pos]) { + pos++; + if (key[pos] == 0) { + goto threshold; + } else { + goto key; + } + } + goto eol; + +threshold: + /* State THRESHOLD - parse number using fscanf, expect comment or space + * or EOL. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (!isdigit(ch)) { + goto eol; + } + ungetc(ch, fp); + if (fscanf(fp, "%d", &thres) != 1) { + return DEFAULT; + } + ch = getc(fp); + if (ch == '#' || ch == EOF || ch == '\n' || isspace(ch)) { + return thres; + } + goto eol; +} + +int get_threshold() +{ + FILE *fp = fopen(SYSCONFIG_KVM, "r"); + int val; + + if (!fp) { + return DEFAULT; + } + + val = get_threshold_from_file(fp); + fclose (fp); + return val; +} + +const char *guest(int count) +{ + return (count == 1 ? "guest" : "guests"); +} + +void emit_count_message(int count) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_INFO, COUNT_MSG, count, guest(count)); + closelog(); +} + +void emit_subscription_message(int count, int threshold) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_WARNING, SUBSCRIPTION_MSG, count, guest(count), threshold); + closelog(); +} + +int main(int argc, char **argv) +{ + int count, threshold; + + if (argc < 3) + exit(1); + + count = atoi(argv[1]); + threshold = get_threshold(); + + if (!strcmp(argv[2], "create")) { + if (threshold == 0) { + emit_count_message(count); + } else if (count > threshold) { + emit_subscription_message(count, threshold); + } + } else { + if (count >= threshold) { + emit_count_message(count); + } + } + + return 0; +} diff --git a/SOURCES/vhost.conf b/SOURCES/vhost.conf new file mode 100644 index 0000000..68d6d7f --- /dev/null +++ b/SOURCES/vhost.conf @@ -0,0 +1,3 @@ +# Increase default vhost memory map limit to match +# KVM's memory slot limit +options vhost max_mem_regions=509 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec new file mode 100644 index 0000000..94de815 --- /dev/null +++ b/SPECS/qemu-kvm.spec @@ -0,0 +1,3652 @@ +%global SLOF_gittagdate 20191022 +%global SLOF_gittagcommit 899d9883 + +%global have_usbredir 1 +%global have_spice 1 +%global have_opengl 1 +%global have_fdt 0 +%global have_gluster 1 +%global have_kvm_setup 0 +%global have_memlock_limits 0 + +%ifnarch %{ix86} x86_64 + %global have_usbredir 0 +%endif + +%ifnarch s390x + %global have_librdma 1 +%else + %global have_librdma 0 +%endif + +%ifarch %{ix86} + %global kvm_target i386 +%endif +%ifarch x86_64 + %global kvm_target x86_64 +%else + %global have_spice 0 + %global have_opengl 0 + %global have_gluster 0 +%endif +%ifarch %{power64} + %global kvm_target ppc64 + %global have_fdt 1 + %global have_kvm_setup 1 + %global have_memlock_limits 1 +%endif +%ifarch s390x + %global kvm_target s390x + %global have_kvm_setup 1 +%endif +%ifarch ppc + %global kvm_target ppc + %global have_fdt 1 +%endif +%ifarch aarch64 + %global kvm_target aarch64 + %global have_fdt 1 +%endif + +#Versions of various parts: + +%global requires_all_modules \ +Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ +%if %{have_gluster} \ +Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} + +# Macro to properly setup RHEL/RHEV conflict handling +%define rhev_ma_conflicts() \ +Obsoletes: %1-ma \ +Obsoletes: %1-rhev + +Summary: QEMU is a machine emulator and virtualizer +Name: qemu-kvm +Version: 4.2.0 +Release: 34%{?dist}.3 +# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped +Epoch: 15 +License: GPLv2 and GPLv2+ and CC-BY +Group: Development/Tools +URL: http://www.qemu.org/ +ExclusiveArch: x86_64 %{power64} aarch64 s390x + + +Source0: http://wiki.qemu.org/download/qemu-4.2.0.tar.xz + +# KSM control scripts +Source4: ksm.service +Source5: ksm.sysconfig +Source6: ksmctl.c +Source7: ksmtuned.service +Source8: ksmtuned +Source9: ksmtuned.conf +Source10: qemu-guest-agent.service +Source11: 99-qemu-guest-agent.rules +Source12: bridge.conf +Source13: qemu-ga.sysconfig +Source21: kvm-setup +Source22: kvm-setup.service +Source23: 85-kvm.preset +Source26: vhost.conf +Source27: kvm.conf +Source28: 95-kvm-memlock.conf +Source30: kvm-s390x.conf +Source31: kvm-x86.conf +Source32: qemu-pr-helper.service +Source33: qemu-pr-helper.socket +Source34: 81-kvm-rhel.rules +Source35: udev-kvm-check.c +Source36: README.tests + + +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Add-support-for-simpletrace.patch +Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch +Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +# For bz#1741345 - Remove the "cpu64-rhel6" CPU from qemu-kvm +Patch22: kvm-i386-Remove-cpu64-rhel6-CPU-model.patch +# For bz#1772774 - qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed ) +Patch23: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +# For bz#1733893 - Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC +Patch24: kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch +# For bz#1782678 - qemu core dump after hot-unplugging the XXV710/XL710 PF +Patch25: kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch +# For bz#1789301 - virtio-blk/scsi: fix notification suppression during AioContext polling +Patch26: kvm-virtio-don-t-enable-notifications-during-polling.patch +# For bz#1790844 - USB related fixes +Patch27: kvm-usbredir-Prevent-recursion-in-usbredir_write.patch +# For bz#1790844 - USB related fixes +Patch28: kvm-xhci-recheck-slot-status.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch29: kvm-tcp_emu-Fix-oob-access.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch30: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch31: kvm-slirp-use-correct-size-while-emulating-commands.patch +# For bz#1559846 - Nested KVM: limit VMX features according to CPU models - Fast Train +Patch32: kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch +# For bz#1725084 - aarch64: support dumping SVE registers +Patch33: kvm-target-arm-arch_dump-Add-SVE-notes.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch34: kvm-vhost-Add-names-to-section-rounded-warning.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch35: kvm-vhost-Only-align-sections-for-vhost-user.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch36: kvm-vhost-coding-style-fix.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch37: kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch38: kvm-vhost-user-fs-remove-vhostfd-property.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch39: kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch40: kvm-virtiofsd-Pull-in-upstream-headers.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch41: kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch42: kvm-virtiofsd-Add-auxiliary-.c-s.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch43: kvm-virtiofsd-Add-fuse_lowlevel.c.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch44: kvm-virtiofsd-Add-passthrough_ll.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch45: kvm-virtiofsd-Trim-down-imported-files.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch46: kvm-virtiofsd-Format-imported-files-to-qemu-style.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch47: kvm-virtiofsd-remove-mountpoint-dummy-argument.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch48: kvm-virtiofsd-remove-unused-notify-reply-support.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch49: kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch50: kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch51: kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch52: kvm-virtiofsd-Trim-out-compatibility-code.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch53: kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch54: kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch55: kvm-virtiofsd-Add-options-for-virtio.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch56: kvm-virtiofsd-add-o-source-PATH-to-help-output.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch57: kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch58: kvm-virtiofsd-Start-wiring-up-vhost-user.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch59: kvm-virtiofsd-Add-main-virtio-loop.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch60: kvm-virtiofsd-get-set-features-callbacks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch61: kvm-virtiofsd-Start-queue-threads.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch62: kvm-virtiofsd-Poll-kick_fd-for-queue.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch63: kvm-virtiofsd-Start-reading-commands-from-queue.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch64: kvm-virtiofsd-Send-replies-to-messages.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch65: kvm-virtiofsd-Keep-track-of-replies.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch66: kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch67: kvm-virtiofsd-Fast-path-for-virtio-read.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch68: kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch69: kvm-virtiofsd-make-f-foreground-the-default.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch70: kvm-virtiofsd-add-vhost-user.json-file.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch71: kvm-virtiofsd-add-print-capabilities-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch72: kvm-virtiofs-Add-maintainers-entry.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch73: kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch74: kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch75: kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch76: kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch77: kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch78: kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch79: kvm-virtiofsd-validate-path-components.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch80: kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch81: kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch82: kvm-virtiofsd-add-fuse_mbuf_iter-API.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch83: kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch84: kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch85: kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch86: kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch87: kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch88: kvm-virtiofsd-sandbox-mount-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch89: kvm-virtiofsd-move-to-an-empty-network-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch90: kvm-virtiofsd-move-to-a-new-pid-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch91: kvm-virtiofsd-add-seccomp-whitelist.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch92: kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch93: kvm-virtiofsd-cap-ng-helpers.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch94: kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch95: kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch96: kvm-virtiofsd-fix-libfuse-information-leaks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch97: kvm-virtiofsd-add-syslog-command-line-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch98: kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch99: kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch100: kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch101: kvm-virtiofsd-Handle-reinit.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch102: kvm-virtiofsd-Handle-hard-reboot.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch103: kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch104: kvm-vhost-user-Print-unexpected-slave-message-types.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch105: kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch106: kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch107: kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch108: kvm-virtiofsd-passthrough_ll-control-readdirplus.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch109: kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch110: kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch111: kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch112: kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch113: kvm-virtiofsd-passthrough_ll-use-hashtable.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch114: kvm-virtiofsd-Clean-up-inodes-on-destroy.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch115: kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch116: kvm-virtiofsd-fix-error-handling-in-main.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch117: kvm-virtiofsd-cleanup-allocated-resource-in-se.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch118: kvm-virtiofsd-fix-memory-leak-on-lo.source.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch119: kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch120: kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch121: kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch122: kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch123: kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch124: kvm-virtiofsd-Support-remote-posix-locks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch125: kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch126: kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch127: kvm-virtiofsd-make-lo_release-atomic.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch128: kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch129: kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch130: kvm-libvhost-user-Fix-some-memtable-remap-cases.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch131: kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch132: kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch133: kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch134: kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch135: kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch136: kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch137: kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch138: kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch139: kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch140: kvm-virtiofsd-process-requests-in-a-thread-pool.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch141: kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch142: kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch143: kvm-virtiofsd-add-thread-pool-size-NUM-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch144: kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch145: kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch146: kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch147: kvm-virtiofsd-add-some-options-to-the-help-message.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch148: kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch149: kvm-xics-Don-t-deassert-outputs.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch150: kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch +# For bz#1787395 - qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str +Patch151: kvm-trace-update-qemu-trace-stap-to-Python-3.patch +# For bz#1794503 - CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0] +Patch153: kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +# For bz#1787444 - Broken postcopy migration with vTPM device +Patch154: kvm-tpm-ppi-page-align-PPI-RAM.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch155: kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch156: kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch157: kvm-tests-arm-cpu-features-Check-feature-default-values.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch158: kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch159: kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch160: kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch161: kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch162: kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +# For bz#1787291 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z] +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +Patch163: kvm-i386-Resolve-CPU-models-to-v1-by-default.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch164: kvm-iotests-Support-job-complete-in-run_job.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch165: kvm-iotests-Create-VM.blockdev_create.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch166: kvm-block-Activate-recursively-even-for-already-active-n.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch167: kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch168: kvm-iotests-Test-external-snapshot-with-VM-state.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch169: kvm-iotests.py-Let-wait_migration-wait-even-more.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch170: kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch171: kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch172: kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch173: kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch174: kvm-backup-top-Begin-drain-earlier.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch175: kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch176: kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch177: kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch178: kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch +# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes +Patch179: kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch +# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes +Patch180: kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch +# For bz#1796240 - Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus +Patch181: kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch +# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] +Patch182: kvm-util-add-slirp_fmt-helpers.patch +# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] +Patch183: kvm-tcp_emu-fix-unsafe-snprintf-usages.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch184: kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch185: kvm-virtio-make-virtio_delete_queue-idempotent.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +# For bz#1805334 - vhost-user/50-qemu-gpu.json is not valid JSON +Patch188: kvm-vhost-user-gpu-Drop-trailing-json-comma.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch189: kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch190: kvm-target-i386-add-a-ucode-rev-property.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch191: kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch192: kvm-target-i386-fix-TCG-UCODE_REV-access.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch193: kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch194: kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch +# For bz#1703907 - [upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading +Patch195: kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +# For bz#1794692 - Mirror block job stops making progress +Patch196: kvm-mirror-Store-MirrorOp.co-for-debuggability.patch +# For bz#1794692 - Mirror block job stops making progress +Patch197: kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch +# For bz#1782529 - Windows Update Enablement with default smbios strings in qemu +Patch198: kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch199: kvm-migration-multifd-clean-pages-after-filling-packet.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch200: kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch201: kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch202: kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch203: kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch204: kvm-qemu-file-Don-t-do-IO-after-shutdown.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch205: kvm-migration-Don-t-send-data-if-we-have-stopped.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch206: kvm-migration-Create-migration_is_running.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch207: kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch208: kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch +# For bz#1797064 - virtiofsd: Fixes +Patch209: kvm-virtiofsd-Remove-fuse_req_getgroups.patch +# For bz#1797064 - virtiofsd: Fixes +Patch210: kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch +# For bz#1797064 - virtiofsd: Fixes +Patch211: kvm-virtiofsd-load_capng-missing-unlock.patch +# For bz#1797064 - virtiofsd: Fixes +Patch212: kvm-virtiofsd-do_read-missing-NULL-check.patch +# For bz#1797064 - virtiofsd: Fixes +Patch213: kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch +# For bz#1797064 - virtiofsd: Fixes +Patch214: kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch +# For bz#1797064 - virtiofsd: Fixes +Patch215: kvm-virtiofsd-Fix-xattr-operations.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch216: kvm-block-nbd-Fix-hang-in-.bdrv_close.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch217: kvm-block-Generic-file-creation-fallback.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch218: kvm-file-posix-Drop-hdev_co_create_opts.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch219: kvm-iscsi-Drop-iscsi_co_create_opts.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch220: kvm-iotests-Add-test-for-image-creation-fallback.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch221: kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch222: kvm-iotests-Use-complete_and_wait-in-155.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch223: kvm-block-Introduce-bdrv_reopen_commit_post-step.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch224: kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch225: kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch226: kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch227: kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch228: kvm-block-Make-bdrv_get_cumulative_perm-public.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch229: kvm-block-Relax-restrictions-for-blockdev-snapshot.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch230: kvm-iotests-Fix-run_job-with-use_log-False.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch231: kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch232: kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch233: kvm-iotests-Add-iothread-cases-to-155.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch234: kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch +# For bz#1809380 - guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0. +Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch +# For bz#1814336 - [POWER9] QEMU migration-test triggers a kernel warning +Patch236: kvm-migration-Rate-limit-inside-host-pages.patch +# For bz#1811670 - Unneeded qemu-guest-agent dependency on pixman +Patch237: kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch +# For bz#1816007 - qemu-img convert failed to convert with block device as target +Patch238: kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch +# For bz#1816007 - qemu-img convert failed to convert with block device as target +Patch239: kvm-block-trickle-down-the-fallback-image-creation-funct.patch +# For bz#1794692 - Mirror block job stops making progress +Patch240: kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch +# For bz#1794692 - Mirror block job stops making progress +Patch241: kvm-mirror-Wait-only-for-in-flight-operations.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch242: kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch243: kvm-replication-assert-we-own-context-before-job_cancel_.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch244: kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch245: kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch246: kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch247: kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch +# For bz#1822682 - QEMU-4.2 fails to start a VM on Azure +Patch248: kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +# For bz#1790899 - [RFE] QEMU devices should have the option to enable/disable hotplug/unplug +Patch249: kvm-pcie_root_port-Add-hotplug-disabling-option.patch +# For bz#1816793 - 'edid' compat handling missing for virtio-gpu-ccw +Patch250: kvm-compat-disable-edid-for-virtio-gpu-ccw.patch +# For bz#1820531 - qmp command query-pci get wrong result after hotplug device under hotplug=off controller +Patch251: kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch +# For bz#1820531 - qmp command query-pci get wrong result after hotplug device under hotplug=off controller +Patch252: kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch +# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] +Patch253: kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch +# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] +Patch254: kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch +# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] +Patch255: kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch +# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] +Patch256: kvm-virtiofsd-jail-lo-proc_self_fd.patch +# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] +Patch257: kvm-virtiofsd-Show-submounts.patch +# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] +Patch258: kvm-virtiofsd-only-retain-file-system-capabilities.patch +# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] +Patch259: kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch +# For bz#1775462 - Creating luks-inside-qcow2 images with cluster_size=2k/4k will get a corrupted image +Patch260: kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch261: kvm-numa-remove-not-needed-check.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch262: kvm-numa-properly-check-if-numa-is-supported.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch263: kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch264: kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch265: kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch266: kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch267: kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch268: kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch269: kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch270: kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch +# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train +Patch271: kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch +# For bz#1813940 - CVE-2020-10702 virt:8.1/qemu-kvm: qemu: weak signature generation in Pointer Authentication support for ARM [rhel-av-8] +Patch272: kvm-target-arm-Fix-PAuth-sbox-functions.patch +# For bz#1749737 - CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8] +Patch273: kvm-Don-t-leak-memory-when-reallocation-fails.patch +# For bz#1749737 - CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8] +Patch274: kvm-Replace-remaining-malloc-free-user-with-glib.patch +# For bz#1839030 - RFE: enable the "memfd" memory backend +Patch275: kvm-Revert-RHEL-disable-hostmem-memfd.patch +# For bz#1827630 - volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm) +Patch276: kvm-block-introducing-bdrv_co_delete_file-interface.patch +# For bz#1827630 - volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm) +Patch277: kvm-block.c-adding-bdrv_co_delete_file.patch +# For bz#1827630 - volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm) +Patch278: kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch +# For bz#1513681 - [Intel 8.2.1 Feat] qemu-kvm PT VMX -- Fast Train +Patch279: kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch +# For bz#1841038 - qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7 +Patch280: kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch +# For bz#1841038 - qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7 +Patch281: kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch282: kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch283: kvm-iotests-Let-_make_test_img-parse-its-parameters.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch284: kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch285: kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch286: kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch287: kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch288: kvm-qemu-img-Add-bitmap-sub-command.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch289: kvm-iotests-Fix-test-178.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch290: kvm-qcow2-Expose-bitmaps-size-during-measure.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch291: kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch292: kvm-qemu-img-Add-convert-bitmaps-option.patch +# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert +# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 +Patch293: kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch294: kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch295: kvm-iotests-don-t-use-format-for-drive_add.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch296: kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch297: kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch298: kvm-backup-Improve-error-for-bdrv_getlength-failure.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch299: kvm-backup-Make-sure-that-source-and-target-size-match.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch300: kvm-iotests-Backup-with-different-source-target-size.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch301: kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch302: kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch303: kvm-mirror-Make-sure-that-source-and-target-size-match.patch +# For bz#1778593 - Qemu coredump when backup to a existing small size image +Patch304: kvm-iotests-Mirror-with-different-source-target-size.patch +# For bz#1841068 - RFE: please support the "ramfb" display device model +Patch305: kvm-enable-ramfb.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch306: kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch307: kvm-block-Add-flags-to-bdrv-_co-_truncate.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch308: kvm-block-backend-Add-flags-to-blk_truncate.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch309: kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch310: kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch311: kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch312: kvm-block-truncate-Don-t-make-backing-file-data-visible.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch313: kvm-iotests-Add-qemu_io_log.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch314: kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch315: kvm-iotests-Test-committing-to-short-backing-file.patch +# For bz#1780574 - Data corruption with resizing short overlay over longer backing files +Patch316: kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch +# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train +Patch317: kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch +# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train +Patch318: kvm-i386-Add-macro-for-stibp.patch +# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train +Patch319: kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch +# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train +Patch320: kvm-i386-Add-new-CPU-model-Cooperlake.patch +# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train +Patch321: kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch +# For bz#1845384 - CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8] +Patch322: kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch +# For bz#1845384 - CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8] +Patch323: kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch +# For bz#1820531 - qmp command query-pci get wrong result after hotplug device under hotplug=off controller +Patch324: kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch +# For bz#1840342 - [Intel 8.2.1 Bug] qemu-kvm Add ARCH_CAPABILITIES to Icelake-Server cpu model - Fast Train +Patch325: kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch327: kvm-linux-headers-update-kvm.h.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch328: kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch329: kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch330: kvm-s390x-Move-initial-reset.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch331: kvm-s390x-Move-clear-reset.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch332: kvm-s390x-Beautify-diag308-handling.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch333: kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch334: kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch335: kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch336: kvm-pc-bios-s390x-Fix-reset-psw-mask.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch337: kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch338: kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch339: kvm-s390x-Add-missing-vcpu-reset-functions.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch340: kvm-s390-sclp-improve-special-wait-psw-logic.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch341: kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch342: kvm-s390-ipl-sync-back-loadparm.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch343: kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch344: kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch345: kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch346: kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch347: kvm-s390x-protvirt-Support-unpack-facility.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch348: kvm-s390x-protvirt-Add-migration-blocker.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch349: kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch350: kvm-s390x-protvirt-KVM-intercept-changes.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch351: kvm-s390x-Add-SIDA-memory-ops.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch352: kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch353: kvm-s390x-protvirt-SCLP-interpretation.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch354: kvm-s390x-protvirt-Set-guest-IPL-PSW.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch355: kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch356: kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch357: kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch358: kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch359: kvm-s390x-Add-unpack-facility-feature-to-GA1.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch360: kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch361: kvm-s390x-pv-Retry-ioctls-on-EINTR.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch362: kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch +# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part +Patch363: kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch +# For bz#1756946 - [zKVM] Re-enable KVM_CAP_S390_AIS for new machine types +Patch364: kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch +# For bz#1756946 - [zKVM] Re-enable KVM_CAP_S390_AIS for new machine types +Patch365: kvm-introduce-kvm_kernel_irqchip_-functions.patch +# For bz#1756946 - [zKVM] Re-enable KVM_CAP_S390_AIS for new machine types +Patch366: kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch +# For bz#1823275 - RHEL8.1 - GPU Numa nodes not visible in guest post the pass-through. +Patch367: kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch368: kvm-vfio-ccw-Fix-error-message.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch369: kvm-vfio-ccw-allow-non-prefetch-ORBs.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch370: kvm-linux-headers-support-vfio-ccw-features.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch371: kvm-vfio-ccw-Refactor-cleanup-of-regions.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch372: kvm-vfio-ccw-Add-support-for-the-schib-region.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch373: kvm-vfio-ccw-Refactor-ccw-irq-handler.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch374: kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch375: kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch +# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part +Patch376: kvm-config-enable-VFIO_CCW.patch +Patch377: kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch +Patch378: kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch +# For bz#1838070 - CVE-2020-1983 virt:rhel/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-8] +Patch379: kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch +# For bz#1835390 - qemu promote host does not support 'EDX.npt' and 'EDX.nrip-save' when test with Q35 machine type on EPYC host +Patch380: kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch +# For bz#1854092 - kvm-unit-tests: tcg smp FAIL +Patch381: kvm-s390x-sigp-Fix-sense-running-reporting.patch +# For bz#1854092 - kvm-unit-tests: tcg smp FAIL +Patch382: kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch +Patch383: kvm-virtio-net-fix-removal-of-failover-device.patch +# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters +Patch384: kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch +# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters +Patch385: kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch +# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters +Patch386: kvm-iotests-026-Test-EIO-on-allocation-in-a-data-file.patch +# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters +Patch387: kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch +# For bz#1780385 - [RFE] AMD EPYC-Rome support for KVM / QEMU guest +Patch388: kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch +# For bz#1689341 - QEMU should report an error and return failure if AMD SEV is not enabled in the kernel +Patch389: kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch +# For bz#1689341 - QEMU should report an error and return failure if AMD SEV is not enabled in the kernel +Patch390: kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch +# For bz#1863034 - RHEL8.3 Beta - Secure Execution: Unable to start Qemu with "-no-reboot" option (qemu-kvm) +Patch391: kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch +# For bz#1869710 - CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-8.3.0] +Patch392: kvm-usb-fix-setup_len-init-CVE-2020-14364.patch +# For bz#1890885 - qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available [rhel-8.3.0.z] +Patch393: kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch +# For bz#1900578 - qemu-ga aborts after guest-shutdown command [rhel-8.3.0.z] +Patch394: kvm-qga-fix-assert-regression-on-guest-shutdown.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch395: kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch396: kvm-error-Fix-examples-in-error.h-s-big-comment.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch397: kvm-error-Improve-error.h-s-big-comment.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch398: kvm-error-Document-Error-API-usage-rules.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch399: kvm-error-New-macro-ERRP_GUARD.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch400: kvm-qga-add-command-guest-get-disks.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch401: kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch402: kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch403: kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch +# For bz#1913818 - Report logical_name for disks without mounted file-system [rhel-8.3.0.z] +Patch404: kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch + +BuildRequires: wget +BuildRequires: rpm-build +BuildRequires: zlib-devel +BuildRequires: glib2-devel +BuildRequires: which +BuildRequires: gnutls-devel +BuildRequires: cyrus-sasl-devel +BuildRequires: libtool +BuildRequires: libaio-devel +BuildRequires: rsync +BuildRequires: python3-devel +BuildRequires: pciutils-devel +BuildRequires: libiscsi-devel +BuildRequires: ncurses-devel +BuildRequires: libattr-devel +BuildRequires: libusbx-devel >= 1.0.22 +%if %{have_usbredir} +BuildRequires: usbredir-devel >= 0.7.1 +%endif +BuildRequires: texinfo +BuildRequires: python3-sphinx +%if %{have_spice} +BuildRequires: spice-protocol >= 0.12.12 +BuildRequires: spice-server-devel >= 0.12.8 +BuildRequires: libcacard-devel +# For smartcard NSS support +BuildRequires: nss-devel +%endif +BuildRequires: libseccomp-devel >= 2.4.0 +# For network block driver +BuildRequires: libcurl-devel +BuildRequires: libssh-devel +BuildRequires: librados-devel +BuildRequires: librbd-devel +%if %{have_gluster} +# For gluster block driver +BuildRequires: glusterfs-api-devel >= 3.6.0 +BuildRequires: glusterfs-devel +%endif +# We need both because the 'stap' binary is probed for by configure +BuildRequires: systemtap +BuildRequires: systemtap-sdt-devel +# For VNC PNG support +BuildRequires: libpng-devel +# For uuid generation +BuildRequires: libuuid-devel +# For BlueZ device support +BuildRequires: bluez-libs-devel +# For Braille device support +BuildRequires: brlapi-devel +# For test suite +BuildRequires: check-devel +# For virtiofs +BuildRequires: libcap-ng-devel +# Hard requirement for version >= 1.3 +BuildRequires: pixman-devel +# Documentation requirement +BuildRequires: perl-podlators +BuildRequires: texinfo +BuildRequires: python3-sphinx +# For rdma +%if 0%{?have_librdma} +BuildRequires: rdma-core-devel +%endif +%if %{have_fdt} +BuildRequires: libfdt-devel >= 1.6.0 +%endif +# iasl and cpp for acpi generation (not a hard requirement as we can use +# pre-compiled files, but it's better to use this) +%ifarch %{ix86} x86_64 +BuildRequires: iasl +BuildRequires: cpp +%endif +# For compressed guest memory dumps +BuildRequires: lzo-devel snappy-devel +# For NUMA memory binding +%ifnarch s390x +BuildRequires: numactl-devel +%endif +BuildRequires: libgcrypt-devel +# qemu-pr-helper multipath support (requires libudev too) +BuildRequires: device-mapper-multipath-devel +BuildRequires: systemd-devel +# used by qemu-bridge-helper and qemu-pr-helper +BuildRequires: libcap-ng-devel + +BuildRequires: diffutils +%ifarch x86_64 +BuildRequires: libpmem-devel +Requires: libpmem +%endif + +# qemu-keymap +BuildRequires: pkgconfig(xkbcommon) + +# For s390-pgste flag +%ifarch s390x +BuildRequires: binutils >= 2.27-16 +%endif + +%if %{have_opengl} +BuildRequires: pkgconfig(epoxy) +BuildRequires: pkgconfig(libdrm) +BuildRequires: pkgconfig(gbm) +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers +%endif + +BuildRequires: perl-Test-Harness + +Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} +%rhev_ma_conflicts qemu-kvm + +%{requires_all_modules} + +%define qemudocdir %{_docdir}/%{name} + +%description +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-kvm-core +Summary: qemu-kvm core components +Requires: qemu-img = %{epoch}:%{version}-%{release} +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +Requires: edk2-ovmf +%endif +%ifarch aarch64 +Requires: edk2-aarch64 +%endif + +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.12.0-3 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif +%ifarch %{power64} +Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} +%endif +Requires: %{name}-common = %{epoch}:%{version}-%{release} +Requires: libseccomp >= 2.4.0 +# For compressed guest memory dumps +Requires: lzo snappy +%if %{have_gluster} +Requires: glusterfs-api >= 3.6.0 +%endif +%if %{have_kvm_setup} +Requires(post): systemd-units +Requires(preun): systemd-units + %ifarch %{power64} +Requires: powerpc-utils + %endif +%endif +Requires: libusbx >= 1.0.19 +%if %{have_usbredir} +Requires: usbredir >= 0.7.1 +%endif +%if %{have_fdt} +Requires: libfdt >= 1.6.0 +%endif + +%rhev_ma_conflicts qemu-kvm + +%description -n qemu-kvm-core +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-img +Summary: QEMU command line tool for manipulating disk images +Group: Development/Tools + +%rhev_ma_conflicts qemu-img + +%description -n qemu-img +This package provides a command line tool for manipulating disk images. + +%package -n qemu-kvm-common +Summary: QEMU common files needed by all QEMU targets +Group: Development/Tools +Requires(post): /usr/bin/getent +Requires(post): /usr/sbin/groupadd +Requires(post): /usr/sbin/useradd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%rhev_ma_conflicts qemu-kvm-common + +%description -n qemu-kvm-common +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides documentation and auxiliary programs used with qemu-kvm. + + +%package -n qemu-guest-agent +Summary: QEMU guest agent +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%description -n qemu-guest-agent +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides an agent to run inside guests, which communicates +with the host over a virtio-serial channel named "org.qemu.guest_agent.0" + +This package does not need to be installed on the host OS. + +%package tests +Summary: tests for the qemu-kvm package +Requires: %{name} = %{epoch}:%{version}-%{release} + +%define testsdir %{_libdir}/%{name}/tests-src + +%description tests +The qemu-kvm-tests rpm contains tests that can be used to verify +the functionality of the installed qemu-kvm package + +Install this package if you want access to the avocado_qemu +tests, or qemu-iotests. + +%package block-curl +Summary: QEMU CURL block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-curl +This package provides the additional CURL block driver for QEMU. + +Install this package if you want to access remote disks over +http, https, ftp and other transports provided by the CURL library. + + +%if %{have_gluster} +%package block-gluster +Summary: QEMU Gluster block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-gluster +This package provides the additional Gluster block driver for QEMU. + +Install this package if you want to access remote Gluster storage. +%endif + + +%package block-iscsi +Summary: QEMU iSCSI block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-iscsi +This package provides the additional iSCSI block driver for QEMU. + +Install this package if you want to access iSCSI volumes. + + +%package block-rbd +Summary: QEMU Ceph/RBD block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-rbd +This package provides the additional Ceph/RBD block driver for QEMU. + +Install this package if you want to access remote Ceph volumes +using the rbd protocol. + + +%package block-ssh +Summary: QEMU SSH block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-ssh +This package provides the additional SSH block driver for QEMU. + +Install this package if you want to access remote disks using +the Secure Shell (SSH) protocol. + + +%prep +%setup -n qemu-%{version} +%autopatch -p1 + +%build +%global buildarch %{kvm_target}-softmmu + +# --build-id option is used for giving info to the debug packages. +buildldflags="VL_LDFLAGS=-Wl,--build-id" + +%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle + +%if 0%{have_gluster} + %global block_drivers_list %{block_drivers_list},gluster +%endif + +./configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{qemudocdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-confsuffix=/"%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ +%if 0%{have_fdt} + --enable-fdt \ +%else + --disable-fdt \ + %endif +%if 0%{have_gluster} + --enable-glusterfs \ +%else + --disable-glusterfs \ +%endif + --enable-guest-agent \ +%ifnarch s390x + --enable-numa \ +%else + --disable-numa \ +%endif + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%else + --disable-rdma \ +%endif + --disable-pvrdma \ + --enable-seccomp \ +%if 0%{have_spice} + --enable-spice \ + --enable-smartcard \ +%else + --disable-spice \ + --disable-smartcard \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%else + --disable-opengl \ +%endif +%if 0%{have_usbredir} + --enable-usb-redir \ +%else + --disable-usb-redir \ +%endif + --disable-tcmalloc \ +%ifarch x86_64 + --enable-libpmem \ +%else + --disable-libpmem \ +%endif + --enable-vhost-user \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%else + --disable-avx2 \ +%endif + --python=%{__python3} \ + --target-list="%{buildarch}" \ + --block-drv-rw-whitelist=%{block_drivers_list} \ + --audio-drv-list= \ + --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ + --with-coroutine=ucontext \ + --tls-priority=NORMAL \ + --disable-bluez \ + --disable-brlapi \ + --enable-cap-ng \ + --enable-coroutine-pool \ + --enable-curl \ + --disable-curses \ + --disable-debug-tcg \ + --enable-docs \ + --disable-gtk \ + --enable-kvm \ + --enable-libiscsi \ + --disable-libnfs \ + --enable-libssh \ + --enable-libusb \ + --disable-bzip2 \ + --enable-linux-aio \ + --disable-live-block-migration \ + --enable-lzo \ + --enable-pie \ + --disable-qom-cast-debug \ + --disable-sdl \ + --enable-snappy \ + --disable-sparse \ + --disable-strip \ + --enable-tpm \ + --enable-trace-backend=dtrace \ + --disable-vde \ + --disable-vhost-scsi \ + --disable-vxhs \ + --disable-virtfs \ + --disable-vnc-jpeg \ + --disable-vte \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --enable-werror \ + --disable-xen \ + --disable-xfsctl \ + --enable-gnutls \ + --enable-gcrypt \ + --disable-nettle \ + --enable-attr \ + --disable-bsd-user \ + --disable-cocoa \ + --enable-debug-info \ + --disable-guest-agent-msi \ + --disable-hax \ + --disable-jemalloc \ + --disable-linux-user \ + --enable-modules \ + --disable-netmap \ + --disable-replication \ + --enable-system \ + --enable-tools \ + --disable-user \ + --enable-vhost-net \ + --enable-vhost-vsock \ + --enable-vnc \ + --enable-mpath \ + --disable-xen-pci-passthrough \ + --enable-tcg \ + --with-git=git \ + --disable-sanitizers \ + --disable-hvf \ + --disable-whpx \ + --enable-malloc-trim \ + --disable-membarrier \ + --disable-vhost-crypto \ + --disable-libxml2 \ + --enable-capstone \ + --disable-git-update \ + --disable-crypto-afalg \ + --disable-debug-mutex \ + --disable-bochs \ + --disable-cloop \ + --disable-dmg \ + --disable-qcow1 \ + --disable-vdi \ + --disable-vvfat \ + --disable-qed \ + --disable-parallels \ + --disable-sheepdog \ + --disable-auth-pam \ + --enable-iconv \ + --disable-lzfse \ + --enable-vhost-kernel \ + --disable-virglrenderer \ + --without-default-devices + +echo "config-host.mak contents:" +echo "===" +cat config-host.mak +echo "===" + +make V=1 %{?_smp_mflags} $buildldflags + +# Setup back compat qemu-kvm binary +%{__python3} scripts/tracetool.py --backend dtrace --format stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm.stp + +%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm-log.stp + +%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm-simpletrace.stp + +cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm + +gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl +gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check + +%ifarch s390x + # Copy the built new images into place for "make check": + cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ +%endif + +%install +%define _udevdir %(pkg-config --variable=udevdir udev) +%define _udevrulesdir %{_udevdir}/rules.d + +install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service +install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm +install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl + +install -D -p -m 0644 %{SOURCE7} $RPM_BUILD_ROOT%{_unitdir}/ksmtuned.service +install -D -p -m 0755 %{SOURCE8} $RPM_BUILD_ROOT%{_sbindir}/ksmtuned +install -D -p -m 0644 %{SOURCE9} $RPM_BUILD_ROOT%{_sysconfdir}/ksmtuned.conf +install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost.conf +%ifarch s390x + install -D -p -m 0644 %{SOURCE30} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else +%ifarch %{ix86} x86_64 + install -D -p -m 0644 %{SOURCE31} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else + install -D -p -m 0644 %{SOURCE27} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%endif +%endif + +mkdir -p $RPM_BUILD_ROOT%{_bindir}/ +mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Create new directories and put them all under tests-src +mkdir -p $RPM_BUILD_ROOT%{testsdir}/python +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/acceptance +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp + +install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} +install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} + +install -m 0644 scripts/dump-guest-memory.py \ + $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Install avocado_qemu tests +cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ + +# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python +cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp +install -p -m 0755 tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ + +# Install qemu-iotests +cp -R tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +# Avoid ambiguous 'python' interpreter name +find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/\(python\|python3\)+%{__python3}+' {} \; + +install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README + +make DESTDIR=$RPM_BUILD_ROOT \ + sharedir="%{_datadir}/%{name}" \ + datadir="%{_datadir}/%{name}" \ + install + +mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset + +# Install qemu-guest-agent service and udev rules +install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga +install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir} + +# - the fsfreeze hook script: +install -D --preserve-timestamps \ + scripts/qemu-guest-agent/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook + +# - the directory for user scripts: +mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d + +# - and the fsfreeze script samples: +mkdir --parents $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +install --preserve-timestamps --mode=0644 \ + scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample \ + $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ + +# - Install dedicated log directory: +mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ + +mkdir -p $RPM_BUILD_ROOT%{_bindir} +install -c -m 0755 qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga + +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 + +install -m 0755 qemu-kvm $RPM_BUILD_ROOT%{_libexecdir}/ +install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ + +rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop +rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp +rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp + +# Install simpletrace +install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +# Avoid ambiguous 'python' interpreter name +sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py + +mkdir -p $RPM_BUILD_ROOT%{qemudocdir} +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* + +install -D -p -m 0644 qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf + +# Provided by package openbios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc64 +# Provided by package SLOF +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/slof.bin + +# Remove unpackaged files. +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/palcode-clipper +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/petalogix*.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bamboo.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/ppc_rom.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp + +rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so + +%ifarch s390x + # Use the s390-*.imgs that we've just built, not the pre-built ones + install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ + install -m 0644 pc-bios/s390-ccw/s390-netboot.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ +%endif + +%ifnarch x86_64 + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin +%endif + +# Remove sparc files +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,tcx.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin + +# Remove ivshmem example programs +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server + +# Remove efi roms +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom + +# Provided by package ipxe +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pxe*rom +# Provided by package vgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/vgabios*bin +# Provided by package seabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bios*.bin +# Provided by package sgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/sgabios.bin + +# the pxe gpxe images will be symlinks to the images on +# /usr/share/ipxe, as QEMU doesn't know how to look +# for other paths, yet. +pxe_link() { + ln -s ../ipxe.efi/$2.rom %{buildroot}%{_datadir}/%{name}/efi-$1.rom +} + +%ifnarch aarch64 s390x +pxe_link e1000 8086100e +pxe_link ne2k_pci 10ec8029 +pxe_link pcnet 10222000 +pxe_link rtl8139 10ec8139 +pxe_link virtio 1af41000 +pxe_link e1000e 808610d3 +%endif + +rom_link() { + ln -s $1 %{buildroot}%{_datadir}/%{name}/$2 +} + +%ifnarch aarch64 s390x + rom_link ../seavgabios/vgabios-isavga.bin vgabios.bin + rom_link ../seavgabios/vgabios-cirrus.bin vgabios-cirrus.bin + rom_link ../seavgabios/vgabios-qxl.bin vgabios-qxl.bin + rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin + rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin + rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin + rom_link ../seavgabios/vgabios-ramfb.bin vgabios-ramfb.bin + rom_link ../seavgabios/vgabios-bochs-display.bin vgabios-bochs-display.bin +%endif +%ifarch x86_64 + rom_link ../seabios/bios.bin bios.bin + rom_link ../seabios/bios-256k.bin bios-256k.bin + rom_link ../sgabios/sgabios.bin sgabios.bin +%endif + +%if 0%{have_kvm_setup} + install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup + install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service + install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset +%endif + +%if 0%{have_memlock_limits} + install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif + +# Install rules to use the bridge helper with libvirt's virbr0 +install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf + +# Install qemu-pr-helper service +install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} + +find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f + +# We need to make the block device modules executable else +# RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so + +# Remove buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo + +# Remove spec +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs + +%check +export DIFF=diff; make check V=1 + +%post -n qemu-kvm-core +# load kvm modules now, so we can make sure no reboot is needed. +# If there's already a kvm module installed, we don't mess with it +%udev_rules_update +sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : + udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : +%if %{have_kvm_setup} + systemctl daemon-reload # Make sure it sees the new presets and unitfile + %systemd_post kvm-setup.service + if systemctl is-enabled kvm-setup.service > /dev/null; then + systemctl start kvm-setup.service + fi +%endif + +%if %{have_kvm_setup} +%preun -n qemu-kvm-core +%systemd_preun kvm-setup.service +%endif + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +%preun -n qemu-kvm-common +%systemd_preun ksm.service +%systemd_preun ksmtuned.service + +%postun -n qemu-kvm-common +%systemd_postun_with_restart ksm.service +%systemd_postun_with_restart ksmtuned.service + +%files +# Deliberately empty + + +%files -n qemu-kvm-common +%defattr(-,root,root) +%dir %{qemudocdir} +%doc %{qemudocdir}/Changelog +%doc %{qemudocdir}/README.rst +%doc %{qemudocdir}/qemu-doc.html +%doc %{qemudocdir}/COPYING +%doc %{qemudocdir}/COPYING.LIB +%doc %{qemudocdir}/LICENSE +%doc %{qemudocdir}/README.systemtap +%doc %{qemudocdir}/qmp-spec.txt +%doc %{qemudocdir}/qemu-doc.txt +%doc %{qemudocdir}/qemu-ga-ref.html +%doc %{qemudocdir}/qemu-ga-ref.txt +%doc %{qemudocdir}/qemu-qmp-ref.html +%doc %{qemudocdir}/qemu-qmp-ref.txt +%doc %{qemudocdir}/interop/* +%{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man7/qemu-cpu-models.7* +%{_bindir}/qemu-keymap +%{_bindir}/qemu-pr-helper +%{_bindir}/qemu-edid +%{_bindir}/qemu-trace-stap +%{_unitdir}/qemu-pr-helper.service +%{_unitdir}/qemu-pr-helper.socket +%{_mandir}/man7/qemu-ga-ref.7* + +%dir %{_datadir}/%{name}/ +%{_datadir}/%{name}/keymaps/ +%{_mandir}/man1/%{name}.1* +%{_mandir}/man1/qemu-trace-stap.1* +%{_mandir}/man7/qemu-block-drivers.7* +%attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper +%config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf +%{_unitdir}/ksm.service +%{_libexecdir}/ksmctl +%config(noreplace) %{_sysconfdir}/sysconfig/ksm +%{_unitdir}/ksmtuned.service +%{_sbindir}/ksmtuned +%{_udevdir}/udev-kvm-check +%{_udevrulesdir}/81-kvm-rhel.rules +%ghost %{_sysconfdir}/kvm +%config(noreplace) %{_sysconfdir}/ksmtuned.conf +%dir %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/bridge.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf +%{_datadir}/%{name}/simpletrace.py* +%{_datadir}/%{name}/tracetool/*.py* +%{_datadir}/%{name}/tracetool/backend/*.py* +%{_datadir}/%{name}/tracetool/format/*.py* + +%files -n qemu-kvm-core +%defattr(-,root,root) +%ifarch x86_64 + %{_datadir}/%{name}/bios.bin + %{_datadir}/%{name}/bios-256k.bin + %{_datadir}/%{name}/linuxboot.bin + %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/kvmvapic.bin + %{_datadir}/%{name}/sgabios.bin + %{_datadir}/%{name}/pvh.bin +%endif +%ifarch s390x + %{_datadir}/%{name}/s390-ccw.img + %{_datadir}/%{name}/s390-netboot.img +%endif +%ifnarch aarch64 s390x + %{_datadir}/%{name}/vgabios.bin + %{_datadir}/%{name}/vgabios-cirrus.bin + %{_datadir}/%{name}/vgabios-qxl.bin + %{_datadir}/%{name}/vgabios-stdvga.bin + %{_datadir}/%{name}/vgabios-vmware.bin + %{_datadir}/%{name}/vgabios-virtio.bin + %{_datadir}/%{name}/vgabios-ramfb.bin + %{_datadir}/%{name}/vgabios-bochs-display.bin + %{_datadir}/%{name}/efi-e1000.rom + %{_datadir}/%{name}/efi-e1000e.rom + %{_datadir}/%{name}/efi-virtio.rom + %{_datadir}/%{name}/efi-pcnet.rom + %{_datadir}/%{name}/efi-rtl8139.rom + %{_datadir}/%{name}/efi-ne2k_pci.rom +%endif +%{_datadir}/icons/* +%{_datadir}/%{name}/linuxboot_dma.bin +%{_datadir}/%{name}/dump-guest-memory.py* +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/%{name}/trace-events-all +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf +%if 0%{have_kvm_setup} + %{_prefix}/lib/systemd/kvm-setup + %{_unitdir}/kvm-setup.service + %{_presetdir}/85-kvm.preset +%endif +%if 0%{have_memlock_limits} + %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif +%{_libexecdir}/virtiofsd +%{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json + +%files -n qemu-img +%defattr(-,root,root) +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* + +%files -n qemu-guest-agent +%defattr(-,root,root,-) +%doc COPYING README.rst +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevrulesdir}/99-qemu-guest-agent.rules +%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga +%{_sysconfdir}/qemu-ga +%{_datadir}/%{name}/qemu-ga +%dir %{_localstatedir}/log/qemu-ga + +%files tests +%{testsdir} + +%files block-curl +%{_libdir}/qemu-kvm/block-curl.so + +%if %{have_gluster} +%files block-gluster +%{_libdir}/qemu-kvm/block-gluster.so +%endif + +%files block-iscsi +%{_libdir}/qemu-kvm/block-iscsi.so + +%files block-rbd +%{_libdir}/qemu-kvm/block-rbd.so + +%files block-ssh +%{_libdir}/qemu-kvm/block-ssh.so + + +%changelog +* Fri Feb 05 2021 Danilo Cesar Lemes de Paula - 4.2.0-34.el8_3.3 +- kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch [bz#1913818] +- kvm-error-Fix-examples-in-error.h-s-big-comment.patch [bz#1913818] +- kvm-error-Improve-error.h-s-big-comment.patch [bz#1913818] +- kvm-error-Document-Error-API-usage-rules.patch [bz#1913818] +- kvm-error-New-macro-ERRP_GUARD.patch [bz#1913818] +- kvm-qga-add-command-guest-get-disks.patch [bz#1913818] +- kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch [bz#1913818] +- kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch [bz#1913818] +- kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch [bz#1913818] +- kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch [bz#1913818] +- Resolves: bz#1913818 + (Report logical_name for disks without mounted file-system [rhel-8.3.0.z]) + +* Wed Dec 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-34.el8_3.2 +- kvm-qga-fix-assert-regression-on-guest-shutdown.patch [bz#1900578] +- Resolves: bz#1900578 + (qemu-ga aborts after guest-shutdown command [rhel-8.3.0.z]) + +* Mon Nov 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-34.el8_3.1 +- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1890885] +- Resolves: bz#1890885 + (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available [rhel-8.3.0.z]) + +* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 4.2.0-34.el8 +- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869710] +- Resolves: bz#1869710 + (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-8.3.0]) + +* Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 4.2.0-33.el8 +- kvm-Require-libfdt-1.6.0.patch [bz#1867847] +- Resolves: bz#1867847 + ([ppc] virt module 7629: /usr/libexec/qemu-kvm: undefined symbol: fdt_check_full, version LIBFDT_1.2) + +* Mon Aug 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-32.el8 +- kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch [bz#1780385] +- kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch [bz#1689341] +- kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch [bz#1689341] +- kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch [bz#1863034] +- Resolves: bz#1689341 + (QEMU should report an error and return failure if AMD SEV is not enabled in the kernel) +- Resolves: bz#1780385 + ([RFE] AMD EPYC-Rome support for KVM / QEMU guest) +- Resolves: bz#1863034 + (RHEL8.3 Beta - Secure Execution: Unable to start Qemu with "-no-reboot" option (qemu-kvm)) + +* Wed Jul 22 2020 Danilo Cesar Lemes de Paula - 4.2.0-31.el8 +- kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch [bz#1807057] +- kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch [bz#1807057] +- kvm-iotests-026-Test-EIO-on-allocation-in-a-data-file.patch [bz#1807057] +- kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch [bz#1807057] +- Resolves: bz#1807057 + (qcow2_alloc_cluster_abort() frees preallocated zero clusters) + +* Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-30.el8 +- kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch [bz#1835390] +- kvm-s390x-sigp-Fix-sense-running-reporting.patch [bz#1854092] +- kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch [bz#1854092] +- kvm-virtio-net-fix-removal-of-failover-device.patch [] +- Resolves: bz#1835390 + (qemu promote host does not support 'EDX.npt' and 'EDX.nrip-save' when test with Q35 machine type on EPYC host) +- Resolves: bz#1854092 + (kvm-unit-tests: tcg smp FAIL) + +* Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 +- kvm-vfio-ccw-Fix-error-message.patch [bz#1660916] +- kvm-vfio-ccw-allow-non-prefetch-ORBs.patch [bz#1660916] +- kvm-linux-headers-support-vfio-ccw-features.patch [bz#1660916] +- kvm-vfio-ccw-Refactor-cleanup-of-regions.patch [bz#1660916] +- kvm-vfio-ccw-Add-support-for-the-schib-region.patch [bz#1660916] +- kvm-vfio-ccw-Refactor-ccw-irq-handler.patch [bz#1660916] +- kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch [bz#1660916] +- kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch [bz#1660916] +- kvm-config-enable-VFIO_CCW.patch [bz#1660916] +- kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [] +- kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [] +- kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch [bz#1838070] +- Resolves: bz#1660916 + ([IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part) +- Resolves: bz#1838070 + (CVE-2020-1983 virt:rhel/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-8]) + +* Fri Jun 19 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 +- kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1828317] +- kvm-linux-headers-update-kvm.h.patch [bz#1828317] +- kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch [bz#1828317] +- kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch [bz#1828317] +- kvm-s390x-Move-initial-reset.patch [bz#1828317] +- kvm-s390x-Move-clear-reset.patch [bz#1828317] +- kvm-s390x-Beautify-diag308-handling.patch [bz#1828317] +- kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch [bz#1828317] +- kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch [bz#1828317] +- kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch [bz#1828317] +- kvm-pc-bios-s390x-Fix-reset-psw-mask.patch [bz#1828317] +- kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch [bz#1828317] +- kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch [bz#1828317] +- kvm-s390x-Add-missing-vcpu-reset-functions.patch [bz#1828317] +- kvm-s390-sclp-improve-special-wait-psw-logic.patch [bz#1828317] +- kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch [bz#1828317] +- kvm-s390-ipl-sync-back-loadparm.patch [bz#1828317] +- kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch [bz#1828317] +- kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch [bz#1828317] +- kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch [bz#1828317] +- kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch [bz#1828317] +- kvm-s390x-protvirt-Support-unpack-facility.patch [bz#1828317] +- kvm-s390x-protvirt-Add-migration-blocker.patch [bz#1828317] +- kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch [bz#1828317] +- kvm-s390x-protvirt-KVM-intercept-changes.patch [bz#1828317] +- kvm-s390x-Add-SIDA-memory-ops.patch [bz#1828317] +- kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch [bz#1828317] +- kvm-s390x-protvirt-SCLP-interpretation.patch [bz#1828317] +- kvm-s390x-protvirt-Set-guest-IPL-PSW.patch [bz#1828317] +- kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch [bz#1828317] +- kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch [bz#1828317] +- kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch [bz#1828317] +- kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch [bz#1828317] +- kvm-s390x-Add-unpack-facility-feature-to-GA1.patch [bz#1828317] +- kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch [bz#1828317] +- kvm-s390x-pv-Retry-ioctls-on-EINTR.patch [bz#1828317] +- kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch [bz#1828317] +- kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch [bz#1828317] +- kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch [bz#1756946] +- kvm-introduce-kvm_kernel_irqchip_-functions.patch [bz#1756946] +- kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch [bz#1756946] +- kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch [bz#1823275] +- Resolves: bz#1756946 + ([zKVM] Re-enable KVM_CAP_S390_AIS for new machine types) +- Resolves: bz#1823275 + (RHEL8.1 - GPU Numa nodes not visible in guest post the pass-through.) +- Resolves: bz#1828317 + ([IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part) + +* Fri Jun 19 2020 Danilo C. L. de Paula - 4.2.0 +- Resolves: bz#1810193 +(Upgrade components in virt:rhel module:stream for RHEL-8.3 release) + +* Tue Jun 09 2020 Danilo C. L. de Paula - 4.2.0-25 +- Resolves: bz#1810193 + (Upgrade components in virt:rhel module:stream for RHEL-8.3 release) + Another sync + +* Thu Jun 04 2020 Danilo C. L. de Paula - 4.2.0-23.el8 +- Resolves: bz#1810193 + (Upgrade components in virt:rhel module:stream for RHEL-8.3 release) + Another syncronization + +* Mon Apr 27 2020 Danilo C. L. de Paula - 4.2.0 +- Resolves: bz#1810193 + (Upgrade components in virt:rhel module:stream for RHEL-8.3 release) + +* Fri Feb 21 2020 Danilo Cesar Lemes de Paula - 2.12.0-99.el8 +- kvm-slirp-disable-tcp_emu.patch [bz#1791677] +- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1790308] +- Resolves: bz#1790308 + (qemu-kvm core dump when do L1 guest live migration with L2 guest running) +- Resolves: bz#1791677 + (QEMU: Slirp: disable emulation of tcp programs like ftp IRC etc. [rhel-8]) + +* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 2.12.0-98.el8 +- kvm-iscsi-Avoid-potential-for-get_status-overflow.patch [bz#1794501] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794501] +- kvm-clean-up-callback-when-del-virtqueue.patch [bz#1708480] +- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1708480] +- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1708480] +- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1708480] +- Resolves: bz#1708480 + ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) +- Resolves: bz#1794501 + (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-8.2.0]) + +* Fri Jan 24 2020 Miroslav Rezanina - 2.12.0-97.el8 +- kvm-exec-Fix-MAP_RAM-for-cached-access.patch [bz#1769613] +- kvm-virtio-Return-true-from-virtio_queue_empty-if-broken.patch [bz#1769613] +- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1752320] +- kvm-xhci-recheck-slot-status.patch [bz#1752320] +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791566] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791566] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791566] +- Resolves: bz#1752320 + (vm gets stuck when migrate vm back and forth with remote-viewer trying to connect) +- Resolves: bz#1769613 + ([SEV] kexec mays hang at "[sda] Synchronizing SCSI cache " before switching to new kernel) +- Resolves: bz#1791566 + (CVE-2020-7039 virt:rhel/qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-8.2.0]) + +* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 2.12.0-96.el8 +- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741346] +- Resolves: bz#1741346 + (Remove the "cpu64-rhel6" CPU from qemu-kvm) + +* Thu Jan 02 2020 Danilo Cesar Lemes de Paula - 2.12.0-95.el8 +- kvm-virtio-gpu-block-both-2d-and-3d-rendering.patch [bz#1674324] +- kvm-x86-Intel-AVX512_BF16-feature-enabling.patch [bz#1642541] +- Resolves: bz#1642541 + ([Intel 8.2 Feature] qemu-kvm Enable BFloat16 data type support) +- Resolves: bz#1674324 + (With , qemu either refuses to start completely or spice-server crashes afterwards) + +* Wed Dec 18 2019 Danilo Cesar Lemes de Paula - 2.12.0-94.el8 +- kvm-util-mmap-alloc-Add-a-is_pmem-parameter-to-qemu_ram_.patch [bz#1539282] +- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1539282] +- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1539282] +- kvm-util-mmap-alloc-support-MAP_SYNC-in-qemu_ram_mmap.patch [bz#1539282] +- kvm-x86-cpu-Enable-MOVDIRI-cpu-feature.patch [bz#1634827] +- kvm-x86-cpu-Enable-MOVDIR64B-cpu-feature.patch [bz#1634827] +- kvm-add-call-to-qemu_add_opts-for-overcommit-option.patch [bz#1634827] +- kvm-support-overcommit-cpu-pm-on-off.patch [bz#1634827] +- kvm-i386-cpu-make-cpu-host-support-monitor-mwait.patch [] +- kvm-x86-cpu-Add-support-for-UMONITOR-UMWAIT-TPAUSE.patch [bz#1634827] +- kvm-target-i386-Add-support-for-save-load-IA32_UMWAIT_CO.patch [bz#1634827] +- Resolves: bz#1539282 + ([Intel 8.2 Feature][Crystal Ridge] Support MAP_SYNC - qemu-kvm) +- Resolves: bz#1634827 + ([Intel 8.2 Feat] KVM Enable SnowRidge Accelerator Interface Architecture (AIA) - qemu) + +* Wed Dec 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-93.el8 +- kvm-target-i386-Export-TAA_NO-bit-to-guests.patch [bz#1771971] +- kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch [bz#1771971] +- Resolves: bz#1771971 + (CVE-2019-11135 virt:rhel/qemu-kvm: hw: TSX Transaction Asynchronous Abort (TAA) [rhel-8.2.0]) + +* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 2.12.0-92.el8 +- kvm-x86-cpu-use-FeatureWordArray-to-define-filtered_feat.patch [bz#1689270] +- kvm-i386-Add-x-force-features-option-for-testing.patch [bz#1689270] +- kvm-target-i386-define-a-new-MSR-based-feature-word-FEAT.patch [bz#1689270] +- kvm-i386-display-known-CPUID-features-linewrapped-in-alp.patch [bz#1689270] +- kvm-target-i386-kvm-kvm_get_supported_msrs-cleanup.patch [bz#1689270] +- kvm-target-i386-handle-filtered_features-in-a-new-functi.patch [bz#1689270] +- kvm-target-i386-introduce-generic-feature-dependency-mec.patch [bz#1689270] +- kvm-target-i386-expand-feature-words-to-64-bits.patch [bz#1689270] +- kvm-target-i386-add-VMX-definitions.patch [bz#1689270] +- kvm-vmxcap-correct-the-name-of-the-variables.patch [bz#1689270] +- kvm-target-i386-add-VMX-features.patch [bz#1689270] +- kvm-target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch [bz#1689270] +- kvm-target-i386-adjust-for-missing-VMX-features.patch [bz#1689270] +- kvm-target-i386-add-VMX-features-to-named-CPU-models.patch [bz#1689270] +- kvm-target-i386-add-VMX-features-to-named-CPU-models-RHE.patch [bz#1689270] +- kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch [bz#1776808] +- Resolves: bz#1689270 + (Nested KVM: limit VMX features according to CPU models - Slow Train) +- Resolves: bz#1776808 + (qemu-kvm crashes when Windows VM is migrated with multiqueue) + +* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 2.12.0-91.el8 +- kvm-qapi-fill-in-CpuInfoFast.arch-in-query-cpus-fast.patch [bz#1730969] +- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1744602] +- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1744602] +- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1744602] +- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1744602] +- kvm-curl-Report-only-ready-sockets.patch [bz#1744602] +- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1744602] +- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1744602] +- Resolves: bz#1730969 + ([ppc] qmp: The 'arch' value returned by the command 'query-cpus-fast' does not match) +- Resolves: bz#1744602 + (qemu-img gets stuck when stream-converting from http) + +* Tue Nov 12 2019 Danilo Cesar Lemes de Paula - 2.12.0-90.el8 +- kvm-i386-Don-t-print-warning-if-phys-bits-was-set-automa.patch [bz#1719127] +- kvm-Disable-CONFIG_I2C-and-CONFIG_IOH3420.patch [bz#1693140] +- kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch [bz#1757482] +- kvm-pc-bios-s390-ccw-define-loadparm-length.patch [bz#1664376] +- kvm-pc-bios-s390-ccw-net-Use-diag308-to-reset-machine-be.patch [bz#1664376] +- kvm-s390-bios-decouple-cio-setup-from-virtio.patch [bz#1664376] +- kvm-s390-bios-decouple-common-boot-logic-from-virtio.patch [bz#1664376] +- kvm-s390-bios-Clean-up-cio.h.patch [bz#1664376] +- kvm-s390-bios-Decouple-channel-i-o-logic-from-virtio.patch [bz#1664376] +- kvm-s390-bios-Map-low-core-memory.patch [bz#1664376] +- kvm-s390-bios-ptr2u32-and-u32toptr.patch [bz#1664376] +- kvm-s390-bios-Support-for-running-format-0-1-channel-pro.patch [bz#1664376] +- kvm-s390-bios-cio-error-handling.patch [bz#1664376] +- kvm-s390-bios-Extend-find_dev-for-non-virtio-devices.patch [bz#1664376] +- kvm-s390-bios-Factor-finding-boot-device-out-of-virtio-c.patch [bz#1664376] +- kvm-s390-bios-Refactor-virtio-to-run-channel-programs-vi.patch [bz#1664376] +- kvm-s390-bios-Use-control-unit-type-to-determine-boot-me.patch [bz#1664376] +- kvm-s390-bios-Add-channel-command-codes-structs-needed-f.patch [bz#1664376] +- kvm-s390-bios-Support-booting-from-real-dasd-device.patch [bz#1664376] +- kvm-s390-bios-Use-control-unit-type-to-find-bootable-dev.patch [bz#1664376] +- kvm-s390x-vfio-ap-Implement-hot-plug-unplug-of-vfio-ap-d.patch [bz#1660906] +- Resolves: bz#1660906 + ([IBM 8.2 FEAT] KVM s390x: Crypto Passthrough Hotplug - qemu part) +- Resolves: bz#1664376 + ([IBM 8.2 FEAT] CCW IPL Support (kvm) - qemu part) +- Resolves: bz#1693140 + (aarch64: qemu: remove smbus_eeprom and i2c from config) +- Resolves: bz#1719127 + ([Intel 8.2 Bug] warning shown when boot VM with “–cpu host” or “–cpu other mode” on ICX platform (physical)) +- Resolves: bz#1757482 + (Fail to migrate a rhel6.10-mt7.6 guest with dimm device) + +* Mon Oct 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-89.el8 +- kvm-accel-use-g_strsplit-for-parsing-accelerator-names.patch [bz#1749022] +- kvm-opts-don-t-silently-truncate-long-parameter-keys.patch [bz#1749022] +- kvm-opts-don-t-silently-truncate-long-option-values.patch [bz#1749022] +- kvm-i386-fix-regression-parsing-multiboot-initrd-modules.patch [bz#1749022] +- kvm-i386-only-parse-the-initrd_filename-once-for-multibo.patch [bz#1749022] +- kvm-opts-remove-redundant-check-for-NULL-parameter.patch [bz#1749022] +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749724] +- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1708459] +- kvm-s390x-cpumodel-Rework-CPU-feature-definition.patch [bz#1660909] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AQIC-interceptio.patch [bz#1660909] +- kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch [bz#1746361] +- kvm-s390-PCI-fix-IOMMU-region-init.patch [bz#1754643] +- kvm-fw_cfg-Improve-error-message-when-can-t-load-splash-.patch [bz#1607367] +- kvm-fw_cfg-Fix-boot-bootsplash-error-checking.patch [bz#1607367] +- kvm-fw_cfg-Fix-boot-reboot-timeout-error-checking.patch [bz#1607367] +- kvm-hw-nvram-fw_cfg-Store-reboot-timeout-as-little-endia.patch [bz#1607367] +- kvm-intel_iommu-Correct-caching-mode-error-message.patch [bz#1738440] +- kvm-intel_iommu-Sanity-check-vfio-pci-config-on-machine-.patch [bz#1738440] +- kvm-qdev-machine-Introduce-hotplug_allowed-hook.patch [bz#1738440] +- kvm-pc-q35-Disallow-vfio-pci-hotplug-without-VT-d-cachin.patch [bz#1738440] +- kvm-intel_iommu-Remove-the-caching-mode-check-during-fla.patch [bz#1738440] +- kvm-pseries-do-not-allow-memory-less-cpu-less-NUMA-node.patch [bz#1651474] +- Resolves: bz#1607367 + (After boot failed, guest should not reboot when set reboot-timeout < -1) +- Resolves: bz#1651474 + (RHEL8.0 Beta - [4.18.0-32.el8.ppc64le] Guest VM crashes during vcpu hotplug with specific numa configuration (kvm)) +- Resolves: bz#1660909 + ([IBM 8.2 FEAT] KVM s390x: Crypto Passthrough Interrupt Support - qemu part) +- Resolves: bz#1708459 + (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) +- Resolves: bz#1738440 + (For intel-iommu, qemu shows conflict behaviors between booting a guest with vfio and hot plugging vfio device) +- Resolves: bz#1746361 + (ccid: Fix incorrect dwProtocol advertisement of T=0) +- Resolves: bz#1749022 + (Please backport 950c4e6c94b1 ("opts: don't silently truncate long option values", 2018-05-09)) +- Resolves: bz#1749724 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-8]) +- Resolves: bz#1754643 + (RHEL8.1 Snapshot3 - Passthrough PCI card goes into error state if used in domain (kvm)) + +* Fri Sep 13 2019 Danilo Cesar Lemes de Paula - 2.12.0-88.el8 +- Revert fix for bz#1749724 - this got delayed to 8.2 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-8]) + +* Tue Sep 03 2019 Danilo Cesar Lemes de Paula - 2.12.0-86.el8 +- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742819] +- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1744415] +- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1744415] +- kvm-i386-x86_cpu_list_feature_names-function.patch [bz#1747185] +- kvm-i386-unavailable-features-QOM-property.patch [bz#1747185] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1738839] +- kvm-iotests-Tweak-221-sizing-for-different-hole-granular.patch [bz#1738839] +- kvm-iotests-Filter-175-s-allocation-information.patch [bz#1738839] +- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1738839] +- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1738839] +- Resolves: bz#1738839 + (I/O error when virtio-blk disk is backed by a raw image on 4k disk) +- Resolves: bz#1742819 + (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) +- Resolves: bz#1744415 + (Backport support for count cache flush Spectre v2 mitigation [slow train]) +- Resolves: bz#1747185 + ("filtered-features" QOM property is not available) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 2.12.0-85.el8 +- kvm-console-Avoid-segfault-in-screendump.patch [bz#1684383] +- kvm-usb-hub-clear-suspend-on-detach.patch [bz#1619661] +- kvm-qemu-img-fix-regression-copying-secrets-during-conve.patch [bz#1727821] +- Resolves: bz#1619661 + (the attach hub on one hub still exits in device manager after unhotplug) +- Resolves: bz#1684383 + (qemu crashed when take screenshot for 2nd head of virtio video device if the display not opened by virt-viewer) +- Resolves: bz#1727821 + (Failed to convert a source image to the qcow2 image encrypted by luks) + +* Fri Aug 16 2019 Danilo Cesar Lemes de Paula - 2.12.0-84.el8 +- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1727033] +- kvm-block-backend-Make-blk_inc-dec_in_flight-public.patch [bz#1716349] +- kvm-virtio-blk-Increase-in_flight-for-request-restart-BH.patch [bz#1716349] +- kvm-block-Fix-AioContext-switch-for-drained-node.patch [bz#1716349] +- kvm-test-bdrv-drain-AioContext-switch-in-drained-section.patch [bz#1716349] +- kvm-block-Use-normal-drain-for-bdrv_set_aio_context.patch [bz#1716349] +- kvm-block-Fix-AioContext-switch-for-bs-drv-NULL.patch [bz#1716347] +- kvm-iothread-fix-crash-with-invalid-properties.patch [bz#1687541] +- kvm-iothread-replace-init_done_cond-with-a-semaphore.patch [bz#1687541] +- kvm-RHEL-disable-hostmem-memfd.patch [bz#1740797] +- Resolves: bz#1687541 + (qemu aborted when start guest with a big iothreads) +- Resolves: bz#1716347 + (Qemu Core dump when quit vm that's in status "paused(io-error)" with data plane enabled) +- Resolves: bz#1716349 + (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) +- Resolves: bz#1727033 + (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1740797 + (Disable memfd in QEMU) + +* Thu Aug 01 2019 Danilo Cesar Lemes de Paula - 2.12.0-83.el8 +- kvm-hw-block-pflash_cfi01-Add-missing-DeviceReset-handle.patch [bz#1707192] +- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1678979] +- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1678979] +- kvm-nbd-client-Lower-min_block-for-block-status-unaligne.patch [bz#1678979] +- kvm-nbd-client-Reject-inaccessible-tail-of-inconsistent-.patch [bz#1678979] +- kvm-nbd-client-Support-qemu-img-convert-from-unaligned-s.patch [bz#1678979] +- kvm-block-Add-bdrv_get_request_alignment.patch [bz#1678979] +- kvm-nbd-server-Advertise-actual-minimum-block-size.patch [bz#1678979] +- kvm-slirp-check-sscanf-result-when-emulating-ident.patch [bz#1727642] +- kvm-slirp-fix-big-little-endian-conversion-in-ident-prot.patch [bz#1727642] +- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1727642] +- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1727642] +- kvm-tap-set-vhostfd-passed-from-qemu-cli-to-non-blocking.patch [bz#1732642] +- kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch [bz#1734751] +- Resolves: bz#1678979 + (qemu-img convert abort when converting image with unaligned size (qemu-img: block/io.c:2134: bdrv_co_block_status: Assertion `*pnum && (((*pnum) % (align)) == 0) && align > offset - aligned_offset\' failed)) +- Resolves: bz#1707192 + (implement missing reset handler for cfi.pflash01 - slow train) +- Resolves: bz#1727642 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu()) +- Resolves: bz#1732642 + (enable the virtio-net frontend to work with the vhost-net backend in SEV guests) +- Resolves: bz#1734751 + (CVE-2019-14378 qemu-kvm: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-8.1.0]) + +* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 2.12.0-82.el8 +- kvm-i386-Add-new-model-of-Cascadelake-Server.patch [bz#1629906] +- kvm-i386-Update-stepping-of-Cascadelake-Server.patch [bz#1629906] +- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1629906] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-NEW.patch [bz#1629906] +- kvm-i386-Disable-OSPKE-on-CPU-model-definitions-NEW.patch [bz#1629906] +- kvm-block-ssh-Convert-from-DPRINTF-macro-to-trace-events.patch [bz#1513367] +- kvm-block-ssh-Do-not-report-read-write-flush-errors-to-t.patch [bz#1513367] +- kvm-qemu-iotests-Fix-paths-for-NFS.patch [bz#1513367] +- kvm-qemu-iotests-Filter-NFS-paths.patch [bz#1513367] +- kvm-iotests-Filter-SSH-paths.patch [bz#1513367] +- kvm-block-ssh-Implement-.bdrv_refresh_filename.patch [bz#1513367] +- kvm-iotests-Use-Python-byte-strings-where-appropriate.patch [bz#1513367] +- kvm-iotests-Unify-log-outputs-between-Python-2-and-3.patch [bz#1513367] +- kvm-ssh-switch-from-libssh2-to-libssh.patch [bz#1513367] +- kvm-redhat-switch-from-libssh2-to-libssh.patch [bz#1513367] +- kvm-block-gluster-limit-the-transfer-size-to-512-MiB.patch [bz#1728657] +- kvm-s390-cpumodel-fix-description-for-the-new-vector-fac.patch [bz#1729975] +- kvm-s390x-cpumodel-remove-esort-from-the-default-model.patch [bz#1729975] +- kvm-s390x-cpumodel-also-change-name-of-vxbeh.patch [bz#1729975] +- kvm-s390x-cpumodel-change-internal-name-of-vxpdeh-to-mat.patch [bz#1729975] +- kvm-target-i386-sev-Do-not-unpin-ram-device-memory-regio.patch [bz#1728958] +- kvm-i386-Save-EFER-for-32-bit-targets.patch [bz#1689269] +- kvm-target-i386-rename-HF_SVMI_MASK-to-HF_GUEST_MASK.patch [bz#1689269] +- kvm-target-i386-kvm-add-VMX-migration-blocker.patch [bz#1689269] +- kvm-target-i386-kvm-just-return-after-migrate_add_blocke.patch [bz#1689269] +- kvm-target-i386-kvm-Delete-VMX-migration-blocker-on-vCPU.patch [bz#1689269] +- kvm-Introduce-kvm_arch_destroy_vcpu.patch [bz#1689269] +- kvm-target-i386-kvm-Use-symbolic-constant-for-DB-BP-exce.patch [bz#1689269] +- kvm-target-i386-kvm-Re-inject-DB-to-guest-with-updated-D.patch [bz#1689269] +- kvm-target-i386-kvm-Block-migration-for-vCPUs-exposed-wi.patch [bz#1689269] +- kvm-target-i386-kvm-do-not-initialize-padding-fields.patch [bz#1689269] +- kvm-linux-headers-synchronize-generic-and-x86-KVM-header.patch [bz#1689269] +- kvm-vmstate-Add-support-for-kernel-integer-types.patch [bz#1689269] +- kvm-target-i386-kvm-Add-support-for-save-and-restore-nes.patch [bz#1689269] +- kvm-target-i386-kvm-Add-support-for-KVM_CAP_EXCEPTION_PA.patch [bz#1689269] +- kvm-target-i386-kvm-Add-nested-migration-blocker-only-wh.patch [bz#1689269] +- kvm-target-i386-kvm-Demand-nested-migration-kernel-capab.patch [bz#1689269] +- kvm-target-i386-skip-KVM_GET-SET_NESTED_STATE-if-VMX-dis.patch [bz#1689269] +- kvm-i386-kvm-Do-not-sync-nested-state-during-runtime.patch [bz#1689269] +- Resolves: bz#1513367 + (qemu with libssh) +- Resolves: bz#1629906 + ([Intel 8.1 Feat] qemu-kvm Introduce Cascade Lake (CLX) cpu model) +- Resolves: bz#1689269 + (Nested KVM: support for migration of nested hypervisors - Slow Train) +- Resolves: bz#1728657 + ('qemu-io write' to a raw image over libgfapi fails) +- Resolves: bz#1728958 + (Hot unplug vfio-pci NIC devices from sev guest will cause qemu-kvm: sev_ram_block_removed: failed to unregister region) +- Resolves: bz#1729975 + (RHEL 8.1 Pre-Beta - Fix for hardware CPU Model) + +* Mon Jul 08 2019 Miroslav Rezanina - 2.12.0-81.el8 +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1714792] +- kvm-virtio-gpu-pass-down-VirtIOGPU-pointer-to-a-bunch-of.patch [bz#1531543] +- kvm-virtio-gpu-add-iommu-support.patch [bz#1531543] +- kvm-virtio-gpu-fix-unmap-in-error-path.patch [bz#1531543] +- Resolves: bz#1531543 + ([RFE] add iommu support to virtio-gpu) +- Resolves: bz#1714792 + ([Intel 8.1 FEAT] MDS_NO exposure to guest) + +* Tue Jul 02 2019 Danilo Cesar Lemes de Paula - 2.12.0-80.el8 +- kvm-qxl-check-release-info-object.patch [bz#1712705] +- kvm-iotests-Make-182-do-without-device_add.patch [bz#1707598] +- Resolves: bz#1707598 + (qemu-iotest 182 fails without device hotplugging support) +- Resolves: bz#1712705 + (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-8]) + +* Fri Jun 28 2019 Danilo de Paula - 15:2.12.0-79 +- Rebuild all virt packages to fix RHEL's upgrade path +- Resolves: rhbz#1695587 + (Ensure modular RPM upgrade path) + +* Thu Jun 20 2019 Miroslav Rezanina - 2.12.0-78.el8 +- kvm-gluster-Handle-changed-glfs_ftruncate-signature.patch [bz#1721983] +- kvm-gluster-the-glfs_io_cbk-callback-function-pointer-ad.patch [bz#1721983] +- Resolves: bz#1721983 + (qemu-kvm can't be build with new gluster version (6.0.6)) + +* Thu Jun 13 2019 Danilo Cesar Lemes de Paula - 2.12.0-77.el8 +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1709970] +- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1710662] +- kvm-linux-headers-Update-for-NVLink2-passthrough-downstr.patch [bz#1710662] +- kvm-pci-Move-NVIDIA-vendor-id-to-the-rest-of-ids.patch [bz#1710662] +- kvm-vfio-quirks-Add-common-quirk-alloc-helper.patch [bz#1710662] +- kvm-vfio-Make-vfio_get_region_info_cap-public.patch [bz#1710662] +- kvm-spapr-Support-NVIDIA-V100-GPU-with-NVLink2.patch [bz#1710662] +- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1719578] +- Resolves: bz#1709970 + ([Intel 8.1 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM - qemu-kvm) +- Resolves: bz#1710662 + ([IBM 8.1 FEAT] POWER9 - Virt: qemu: NVLink2 passthru to guest - Nvidia Volta (GPU) (kvm)) +- Resolves: bz#1719578 + (VM failed to start with error "failed to install seccomp syscall filter in the kernel") + +* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-76.el8 +- kvm-Introduce-new-no_guest_reset-parameter-for-usb-host-.patch [bz#1713677] +- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713677] +- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713677] +- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713677] +- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1673396 bz#1673401] +- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1673396 bz#1673401] +- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1673396 bz#1673401] +- kvm-Disable-VXHS-support.patch [bz#1714933] +- Resolves: bz#1673396 + (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) +- Resolves: bz#1673401 + (Qemu core dump when start guest with two disks using same drive) +- Resolves: bz#1713677 + (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) +- Resolves: bz#1714933 + (Disable VXHS in qemu-kvm) + +* Fri May 24 2019 Danilo Cesar Lemes de Paula - 2.12.0-75.el8 +- kvm-s390x-cpumodel-enum-type-S390FeatGroup-now-gets-gene.patch [bz#1660912] +- kvm-linux-headers-update-against-Linux-5.2-rc1.patch [bz#1660912] +- kvm-s390x-cpumodel-ignore-csske-for-expansion.patch [bz#1660912] +- kvm-s390x-cpumodel-Miscellaneous-Instruction-Extensions-.patch [bz#1660912] +- kvm-s390x-cpumodel-msa9-facility.patch [bz#1660912] +- kvm-s390x-cpumodel-vector-enhancements.patch [bz#1660912] +- kvm-s390x-cpumodel-enhanced-sort-facility.patch [bz#1660912] +- kvm-s390x-cpumodel-add-Deflate-conversion-facility.patch [bz#1660912] +- kvm-s390x-cpumodel-add-gen15-defintions.patch [bz#1660912] +- kvm-s390x-cpumodel-wire-up-8561-and-8562-as-gen15-machin.patch [bz#1660912] +- kvm-spice-set-device-address-and-device-display-ID-in-QX.patch [bz#1712946] +- kvm-hw-pci-Add-missing-include.patch [bz#1712946] +- Resolves: bz#1660912 + ([IBM 8.1 FEAT] KVM s390x: Add hardware CPU Model - qemu part) +- Resolves: bz#1712946 + (qemu-kvm build is broken due to spice_qxl_set_max_monitors being deprecated) + +* Mon May 20 2019 Danilo Cesar Lemes de Paula - 2.12.0-74.el8 +- kvm-x86-cpu-Enable-CLDEMOTE-Demote-Cache-Line-cpu-featur.patch [bz#1696436] +- kvm-memory-Fix-the-memory-region-type-assignment-order.patch [bz#1667249] +- kvm-target-i386-sev-Do-not-pin-the-ram-device-memory-reg.patch [bz#1667249] +- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673010] +- kvm-target-i386-define-md-clear-bit.patch [bz#1703302 bz#1703308] +- Resolves: bz#1667249 + (Fail to launch AMD SEV VM with assigned PCI device) +- Resolves: bz#1673010 + (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) +- Resolves: bz#1696436 + ([Intel 8.0 Feat] KVM Enabling SnowRidge new NIs - qemu-kvm) +- Resolves: bz#1703302 + (CVE-2018-12130 virt:rhel/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-8]) +- Resolves: bz#1703308 + (CVE-2018-12127 virt:rhel/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-8]) + +* Tue May 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-73.el8 +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1561761] +- kvm-i386-Disable-OSPKE-on-CPU-model-definitions.patch [bz#1561761] +- Resolves: bz#1561761 + ([Intel 8.1 Feat] qemu-kvm Introduce Icelake cpu model) + +* Tue May 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-72.el8 +- kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch [bz#1707706] +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1707706] +- Resolves: bz#1707706 + (/builddir/build/BUILD/qemu-2.12.0/target/i386/kvm.c:2031: kvm_put_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed.) + +* Wed May 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-71.el8 +- kvm-s390-bios-Skip-bootmap-signature-entries.patch [bz#1683275] +- Resolves: bz#1683275 + ([IBM 8.1 FEAT] KVM: Secure Linux Boot Toleration (qemu)) + +* Tue May 07 2019 Danilo Cesar Lemes de Paula - 2.12.0-70.el8 +- kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch [bz#1561761] +- kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch [bz#1561761] +- kvm-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1561761] +- kvm-i386-Add-CPUID-bit-for-WBNOINVD.patch [bz#1561761] +- kvm-i386-Add-new-CPU-model-Icelake-Server-Client.patch [bz#1561761] +- kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch [bz#1561761] +- kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch [bz#1561761] +- kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch [bz#1561761] +- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1561761] +- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1561761] +- Resolves: bz#1561761 + ([Intel 8.1 Feat] qemu-kvm Introduce Icelake cpu model) + +* Fri May 03 2019 Danilo Cesar Lemes de Paula - 2.12.0-69.el8 +- kvm-tests-crypto-Use-the-IEC-binary-prefix-definitions.patch [bz#1680231] +- kvm-crypto-expand-algorithm-coverage-for-cipher-benchmar.patch [bz#1680231] +- kvm-crypto-remove-code-duplication-in-tweak-encrypt-decr.patch [bz#1680231] +- kvm-crypto-introduce-a-xts_uint128-data-type.patch [bz#1680231] +- kvm-crypto-convert-xts_tweak_encdec-to-use-xts_uint128-t.patch [bz#1680231] +- kvm-crypto-convert-xts_mult_x-to-use-xts_uint128-type.patch [bz#1680231] +- kvm-crypto-annotate-xts_tweak_encdec-as-inlineable.patch [bz#1680231] +- kvm-crypto-refactor-XTS-cipher-mode-test-suite.patch [bz#1680231] +- kvm-crypto-add-testing-for-unaligned-buffers-with-XTS-ci.patch [bz#1680231] +- Resolves: bz#1680231 + (severe performance impact using luks format) + +* Mon Apr 29 2019 Danilo Cesar Lemes de Paula - 2.12.0-68.el8 +- kvm-s390x-ipl-Try-to-detect-Linux-vs-non-Linux-for-initi.patch [bz#1699070] +- kvm-loader-Check-access-size-when-calling-rom_ptr-to-avo.patch [bz#1699070] +- kvm-hw-s390x-Use-the-IEC-binary-prefix-definitions.patch [bz#1699070] +- kvm-s390x-storage-attributes-fix-CMMA_BLOCK_SIZE-usage.patch [bz#1699070] +- kvm-s390x-cpumodel-fix-segmentation-fault-when-baselinin.patch [bz#1699070] +- kvm-hw-s390x-s390-pci-bus-Convert-sysbus-init-function-t.patch [bz#1699070] +- kvm-s390x-pci-properly-fail-if-the-zPCI-device-cannot-be.patch [bz#1699070] +- kvm-s390x-pci-rename-hotplug-handler-callbacks.patch [bz#1699070] +- kvm-s390-avoid-potential-null-dereference-in-s390_pcihos.patch [bz#1699070] +- kvm-s390x-pci-Send-correct-event-on-hotplug.patch [bz#1699070] +- kvm-s390x-pci-Set-the-iommu-region-size-mpcifc-request.patch [bz#1699070] +- kvm-s390x-pci-Always-delete-and-free-the-release_timer.patch [bz#1699070] +- kvm-s390x-pci-Ignore-the-unplug-call-if-we-already-have-.patch [bz#1699070] +- kvm-s390x-pci-Use-hotplug_dev-instead-of-looking-up-the-.patch [bz#1699070] +- kvm-s390x-pci-Move-some-hotplug-checks-to-the-pre_plug-h.patch [bz#1699070] +- kvm-s390x-pci-Introduce-unplug-requests-and-split-unplug.patch [bz#1699070] +- kvm-s390x-pci-Drop-release-timer-and-replace-it-with-a-f.patch [bz#1699070] +- kvm-s390x-pci-mark-zpci-devices-as-unmigratable.patch [bz#1699070] +- kvm-s390x-pci-Fix-primary-bus-number-for-PCI-bridges.patch [bz#1699070] +- kvm-s390x-pci-Fix-hotplugging-of-PCI-bridges.patch [bz#1699070] +- kvm-s390x-pci-Warn-when-adding-PCI-devices-without-the-z.patch [bz#1699070] +- kvm-s390x-pci-Unplug-remaining-requested-devices-on-pcih.patch [bz#1699070] +- kvm-s390x-refactor-reset-reipl-handling.patch [bz#1699070] +- kvm-s390-ipl-fix-ipl-with-no-reboot.patch [bz#1699070] +- Resolves: bz#1699070 + (Backport s390x-related fixes for qemu-kvm) + +* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 2.12.0-67.el8 +- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693116] +- Resolves: bz#1693116 + (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-8.0]) + +* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 2.12.0-66.el8 +- kvm-iotests-153-Fix-dead-code.patch [bz#1694148] +- kvm-file-posix-Include-filename-in-locking-error-message.patch [bz#1694148] +- kvm-file-posix-Skip-effectiveless-OFD-lock-operations.patch [bz#1694148] +- kvm-file-posix-Drop-s-lock_fd.patch [bz#1694148] +- kvm-tests-Add-unit-tests-for-image-locking.patch [bz#1694148] +- kvm-file-posix-Fix-shared-locks-on-reopen-commit.patch [bz#1694148] +- kvm-iotests-Test-file-posix-locking-and-reopen.patch [bz#1694148] +- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1694148] +- kvm-hostmem-file-remove-object-id-from-pmem-error-messag.patch [bz#1687596] +- kvm-redhat-setting-target-release-to-rhel-8.1.0.patch [] +- kvm-redhat-removing-iotest-182.patch [] +- Resolves: bz#1687596 + ([Intel 8.1 BUG][KVM][Crystal Ridge]object_get_canonical_path_component: assertion failed: (obj->parent != NULL)) +- Resolves: bz#1694148 + (QEMU image locking needn't double open fd number, and it should not fail when attempting to release locks) + +* Tue Apr 09 2019 Danilo Cesar Lemes de Paula - 2.12.0-65.el8 +- kvm-s390x-cpumodel-mepochptff-warn-when-no-mepoch-and-re.patch [bz#1664371] +- kvm-s390x-cpumodel-add-z14-GA2-model.patch [bz#1664371] +- kvm-redhat-s390x-cpumodel-enable-mepoch-by-default-for-z.patch [bz#1664371] +- kvm-intel_iommu-fix-operator-in-vtd_switch_address_space.patch [bz#1662272] +- kvm-intel_iommu-reset-intr_enabled-when-system-reset.patch [bz#1662272] +- kvm-pci-msi-export-msi_is_masked.patch [bz#1662272] +- kvm-i386-kvm-ignore-masked-irqs-when-update-msi-routes.patch [bz#1662272] +- Resolves: bz#1662272 + (Boot guest with device assignment+vIOMMU, qemu prompts "vtd_interrupt_remap_msi: MSI address low 32 bit invalid: 0x0" when first rebooting guest) +- Resolves: bz#1664371 + ([IBM 8.1 FEAT] Update hardware CPU Model z14 (kvm) - qemu part) + +* Mon Apr 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-64.el8 +- kvm-doc-fix-the-configuration-path.patch [bz#1645411] +- kvm-Increase-number-of-iotests-being-run-as-a-part-of-RH.patch [bz#1664463] +- kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] +- kvm-qemu-kvm.spec.template-Update-pyton-path-to-system-i.patch [] +- Resolves: bz#1645411 + (the "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong) +- Resolves: bz#1664463 + (Modify iotest behavior to include luks and nbd and fail build if iotests fail) +- Resolves: bz#1676907 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1685995 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 2.12.0-63.el8 +- kvm-scsi-generic-avoid-possible-out-of-bounds-access-to-.patch [bz#1668162] +- Resolves: bz#1668162 + (CVE-2019-6501 qemu-kvm: QEMU: scsi-generic: possible OOB access while handling inquiry request [rhel-8]) + +* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 2.12.0-62.el8 +- kvm-slirp-check-data-length-while-emulating-ident-functi.patch [bz#1669069] +- Resolves: bz#1669069 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-8.0]) + +* Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-61.el8 +- kvm-qemu-ga-make-get-fsinfo-work-over-pci-bridges.patch [bz#1666952] +- kvm-qga-fix-driver-leak-in-guest-get-fsinfo.patch [bz#1666952] +- Resolves: bz#1666952 + (qemu-guest-agent does not parse PCI bridge links in "build_guest_fsinfo_for_real_device" (q35)) + +* Mon Jan 28 2019 Danilo Cesar Lemes de Paula - 2.12.0-60.el8 +- kvm-ne2000-fix-possible-out-of-bound-access-in-ne2000_re.patch [bz#1636784] +- kvm-rtl8139-fix-possible-out-of-bound-access.patch [bz#1636784] +- kvm-pcnet-fix-possible-buffer-overflow.patch [bz#1636784] +- kvm-net-ignore-packet-size-greater-than-INT_MAX.patch [bz#1636784] +- kvm-net-drop-too-large-packet-early.patch [bz#1636784] +- kvm-net-hub-suppress-warnings-of-no-host-network-for-qte.patch [bz#1636784] +- kvm-virtio-net-test-accept-variable-length-argument-in-p.patch [bz#1636784] +- kvm-virtio-net-test-remove-unused-macro.patch [bz#1636784] +- kvm-virtio-net-test-add-large-tx-buffer-test.patch [bz#1636784] +- kvm-s390x-Return-specification-exception-for-unimplement.patch [bz#1668261] +- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1665844] +- Resolves: bz#1636784 + (CVE-2018-17963 qemu-kvm: Qemu: net: ignore packets with large size [rhel-8]) +- Resolves: bz#1665844 + (Guest quit with error when hotunplug cpu) +- Resolves: bz#1668261 + ([RHEL8] Backport diag308 stable exception fix (qemu-kvm)) + +* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 2.12.0-59.el8 +- kvm-hw-scsi-cleanups-before-VPD-BL-emulation.patch [bz#1639957] +- kvm-hw-scsi-centralize-SG_IO-calls-into-single-function.patch [bz#1639957] +- kvm-hw-scsi-add-VPD-Block-Limits-emulation.patch [bz#1639957] +- kvm-scsi-disk-Block-Device-Characteristics-emulation-fix.patch [bz#1639957] +- kvm-scsi-generic-keep-VPD-page-list-sorted.patch [bz#1639957] +- kvm-scsi-generic-avoid-out-of-bounds-access-to-VPD-page-.patch [bz#1639957] +- kvm-scsi-generic-avoid-invalid-access-to-struct-when-emu.patch [bz#1639957] +- kvm-scsi-generic-do-not-do-VPD-emulation-for-sense-other.patch [bz#1639957] +- Resolves: bz#1639957 + ([RHEL.8] scsi host device passthrough limits IO writes - slow train) + +* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 2.12.0-58.el8 +- kvm-block-Update-flags-in-bdrv_set_read_only.patch [bz#1644996] +- kvm-block-Add-auto-read-only-option.patch [bz#1644996] +- kvm-rbd-Close-image-in-qemu_rbd_open-error-path.patch [bz#1644996] +- kvm-block-Require-auto-read-only-for-existing-fallbacks.patch [bz#1644996] +- kvm-nbd-Support-auto-read-only-option.patch [bz#1644996] +- kvm-file-posix-Support-auto-read-only-option.patch [bz#1644996] +- kvm-curl-Support-auto-read-only-option.patch [bz#1644996] +- kvm-gluster-Support-auto-read-only-option.patch [bz#1644996] +- kvm-iscsi-Support-auto-read-only-option.patch [bz#1644996] +- kvm-block-Make-auto-read-only-on-default-for-drive.patch [bz#1644996] +- kvm-qemu-iotests-Test-auto-read-only-with-drive-and-bloc.patch [bz#1644996] +- kvm-block-Fix-update-of-BDRV_O_AUTO_RDONLY-in-update_fla.patch [bz#1644996] +- kvm-qemu-img-Add-C-option-for-convert-with-copy-offloadi.patch [bz#1623082] +- kvm-iotests-Add-test-for-qemu-img-convert-C-compatibilit.patch [bz#1623082] +- Resolves: bz#1623082 + ([rhel.8.0]Target files for 'qemu-img convert' do not support thin_provisoning with iscsi/nfs backend) +- Resolves: bz#1644996 + (block-commit can't be used with -blockdev) + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-57.el8 +- kvm-qemu-kvm.spec.template-Update-files-for-tests-rpm-to.patch [bz#1601107] + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-56.el8 +- kvm-Run-iotests-as-part-of-the-build-process.patch [bz#1661026] +- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1601107] +- Resolves: bz#1601107 + (qemu-kvm packaging: make running qemu-iotests more robust) +- Resolves: bz#1661026 + (Run iotests as part of build process) + +* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-55.el8 +- kvm-block-Don-t-inactivate-children-before-parents.patch [bz#1659395] +- kvm-iotests-Test-migration-with-blockdev.patch [bz#1659395] +- Resolves: bz#1659395 + (src qemu core dump when do migration ( block device node-name changed after change cdrom) - Slow Train) + +* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-54.el8 +- kvm-s390x-tcg-avoid-overflows-in-time2tod-tod2time.patch [bz#1653569] +- kvm-s390x-kvm-pass-values-instead-of-pointers-to-kvm_s39.patch [bz#1653569] +- kvm-s390x-tod-factor-out-TOD-into-separate-device.patch [bz#1653569] +- kvm-s390x-tcg-drop-tod_basetime.patch [bz#1653569] +- kvm-s390x-tcg-properly-implement-the-TOD.patch [bz#1653569] +- kvm-s390x-tcg-SET-CLOCK-COMPARATOR-can-clear-CKC-interru.patch [bz#1653569] +- kvm-s390x-tcg-implement-SET-CLOCK.patch [bz#1653569] +- kvm-s390x-tcg-rearm-the-CKC-timer-during-migration.patch [bz#1653569] +- kvm-s390x-tcg-fix-locking-problem-with-tcg_s390_tod_upda.patch [bz#1653569] +- kvm-hw-s390x-Include-the-tod-qemu-also-for-builds-with-d.patch [bz#1653569] +- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1653569] +- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1653569] +- kvm-migration-discard-non-migratable-RAMBlocks.patch [bz#1539285] +- kvm-vfio-pci-do-not-set-the-PCIDevice-has_rom-attribute.patch [bz#1539285] +- kvm-memory-exec-Expose-all-memory-block-related-flags.patch [bz#1539285] +- kvm-memory-exec-switch-file-ram-allocation-functions-to-.patch [bz#1539285] +- kvm-configure-add-libpmem-support.patch [bz#1539285] +- kvm-hostmem-file-add-the-pmem-option.patch [bz#1539285] +- kvm-mem-nvdimm-ensure-write-persistence-to-PMEM-in-label.patch [bz#1539285] +- kvm-migration-ram-Add-check-and-info-message-to-nvdimm-p.patch [bz#1539285] +- kvm-migration-ram-ensure-write-persistence-on-loading-al.patch [bz#1539285] +- Resolves: bz#1539285 + ([Intel 8.0 Bug] [KVM][Crystal Ridge] Lack of data persistence guarantee of QEMU writes to host PMEM) +- Resolves: bz#1653569 + (Stress guest and stop it, then do live migration, guest hit call trace on destination end) + +* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-53.el8 +- kvm-ui-add-qapi-parser-for-display.patch [bz#1652871] +- kvm-ui-switch-trivial-displays-to-qapi-parser.patch [bz#1652871] +- kvm-qapi-Add-rendernode-display-option-for-egl-headless.patch [bz#1652871] +- kvm-ui-Allow-specifying-rendernode-display-option-for-eg.patch [bz#1652871] +- kvm-qapi-add-query-display-options-command.patch [bz#1652871] +- Resolves: bz#1652871 + (QEMU doesn't expose rendernode option for egl-headless display type) + +* Fri Jan 04 2019 Danilo Cesar Lemes de Paula - 2.12.0-52.el8 +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1654276] +- Resolves: bz#1654276 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + +* Mon Dec 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-51.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1598284] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1598284] +- kvm-i386-do-not-migrate-MSR_SMI_COUNT-on-machine-types-2.patch [bz#1659565] +- kvm-pc-x-migrate-smi-count-to-PC_RHEL_COMPAT.patch [bz#1659565] +- kvm-slow-train-kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF.patch [bz#1656829] +- Resolves: bz#1598284 + ([Intel 8.0 Alpha] physical bits should < 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) +- Resolves: bz#1656829 + (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) +- Resolves: bz#1659565 + (machine type: required compat flag x-migrate-smi-count=off) + +* Tue Dec 18 2018 Danilo Cesar Lemes de Paula - 2.12.0-51 +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1654276] +- Resolves: bz#1654276 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + +* Mon Dec 17 2018 Danilo Cesar Lemes de Paula - +- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1654486] +- Resolves: bz#1654486 + ([RFE] enable TPM passthrough at compile time (qemu-kvm)) + +* Fri Dec 14 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-48 +- kvm-redhat-use-autopatch-instead-of-PATCHAPPLY.patch [bz#1613128] +- kvm-redhat-Removing-some-unused-build-flags-in-the-spec-.patch [bz#1613128] +- kvm-redhat-Fixing-rhev-ma-conflicts.patch [bz#1613126] +- kvm-redhat-Remove-_smp_mflags-cleanup-workaround-for-s39.patch [bz#1613128] +- kvm-redhat-Removing-dead-code-from-the-spec-file.patch [bz#1613128] +- kvm-i386-Add-stibp-flag-name.patch [bz#1639446] +- kvm-Add-functional-acceptance-tests-infrastructure.patch [bz#1655807] +- kvm-scripts-qemu.py-allow-adding-to-the-list-of-extra-ar.patch [bz#1655807] +- kvm-Acceptance-tests-add-quick-VNC-tests.patch [bz#1655807] +- kvm-scripts-qemu.py-introduce-set_console-method.patch [bz#1655807] +- kvm-Acceptance-tests-add-Linux-kernel-boot-and-console-c.patch [bz#1655807] +- kvm-Bootstrap-Python-venv-for-tests.patch [bz#1655807] +- kvm-Acceptance-tests-add-make-rule-for-running-them.patch [bz#1655807] +- Resolves: bz#1613126 + (Check and fix qemu-kvm-rhev and qemu-kvm-ma conflicts in qemu-kvm for rhel-8) +- Resolves: bz#1613128 + (Spec file clean up) +- Resolves: bz#1639446 + (Cross migration from RHEL7.5 to RHEL8 shouldn't fail with cpu flag stibp [qemu-kvm]) +- Resolves: bz#1655807 + (Backport avocado-qemu tests for QEMU 2.12) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-47 +- kvm-Disable-CONFIG_IPMI-and-CONFIG_I2C-for-ppc64.patch [bz#1640044] +- kvm-Disable-CONFIG_CAN_BUS-and-CONFIG_CAN_SJA1000.patch [bz#1640042] +- Resolves: bz#1640042 + (Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 config switches) +- Resolves: bz#1640044 + (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +- kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] +- kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] +- kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] +- kvm-include-Add-IEC-binary-prefixes-in-qemu-units.h.patch [bz#1656507] +- kvm-qcow2-Options-documentation-fixes.patch [bz#1656507] +- kvm-include-Add-a-lookup-table-of-sizes.patch [bz#1656507] +- kvm-qcow2-Make-sizes-more-humanly-readable.patch [bz#1656507] +- kvm-qcow2-Avoid-duplication-in-setting-the-refcount-cach.patch [bz#1656507] +- kvm-qcow2-Assign-the-L2-cache-relatively-to-the-image-si.patch [bz#1656507] +- kvm-qcow2-Increase-the-default-upper-limit-on-the-L2-cac.patch [bz#1656507] +- kvm-qcow2-Resize-the-cache-upon-image-resizing.patch [bz#1656507] +- kvm-qcow2-Set-the-default-cache-clean-interval-to-10-min.patch [bz#1656507] +- kvm-qcow2-Explicit-number-replaced-by-a-constant.patch [bz#1656507] +- kvm-block-backend-Set-werror-rerror-defaults-in-blk_new.patch [bz#1657637] +- kvm-qcow2-Fix-cache-clean-interval-documentation.patch [bz#1656507] +- Resolves: bz#1656507 + ([RHEL.8] qcow2 cache is too small) +- Resolves: bz#1657637 + (Wrong werror default for -device drive=) + +* Thu Dec 06 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-45 +- kvm-target-ppc-add-basic-support-for-PTCR-on-POWER9.patch [bz#1639069] +- kvm-linux-headers-Update-for-nested-KVM-HV-downstream-on.patch [bz#1639069] +- kvm-target-ppc-Add-one-reg-id-for-ptcr.patch [bz#1639069] +- kvm-ppc-spapr_caps-Add-SPAPR_CAP_NESTED_KVM_HV.patch [bz#1639069] +- kvm-Re-enable-CONFIG_HYPERV_TESTDEV.patch [bz#1651195] +- kvm-qxl-use-guest_monitor_config-for-local-renderer.patch [bz#1610163] +- kvm-Declare-cirrus-vga-as-deprecated.patch [bz#1651994] +- kvm-Do-not-build-bluetooth-support.patch [bz#1654651] +- kvm-vfio-helpers-Fix-qemu_vfio_open_pci-crash.patch [bz#1645840] +- kvm-balloon-Allow-multiple-inhibit-users.patch [bz#1650272] +- kvm-Use-inhibit-to-prevent-ballooning-without-synchr.patch [bz#1650272] +- kvm-vfio-Inhibit-ballooning-based-on-group-attachment-to.patch [bz#1650272] +- kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch [bz#1650272] +- kvm-vfio-pci-Handle-subsystem-realpath-returning-NULL.patch [bz#1650272] +- kvm-vfio-pci-Fix-failure-to-close-file-descriptor-on-err.patch [bz#1650272] +- kvm-postcopy-Synchronize-usage-of-the-balloon-inhibitor.patch [bz#1650272] +- Resolves: bz#1610163 + (guest shows border blurred screen with some resolutions when qemu boot with -device qxl-vga ,and guest on rhel7.6 has no such question) +- Resolves: bz#1639069 + ([IBM 8.0 FEAT] POWER9 - Nested virtualization in RHEL8.0 KVM for ppc64le - qemu-kvm side) +- Resolves: bz#1645840 + (Qemu core dump when hotplug nvme:// drive via -blockdev) +- Resolves: bz#1650272 + (Ballooning is incompatible with vfio assigned devices, but not prevented) +- Resolves: bz#1651195 + (Re-enable hyperv-testdev device) +- Resolves: bz#1651994 + (Declare the "Cirrus VGA" device emulation of QEMU as deprecated in RHEL8) +- Resolves: bz#1654651 + (Qemu: hw: bt: keep bt/* objects from building [rhel-8.0]) + +* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-44 +- kvm-block-Make-more-block-drivers-compile-time-configura.patch [bz#1598842 bz#1598842] +- kvm-RHEL8-Add-disable-configure-options-to-qemu-spec-fil.patch [bz#1598842] +- Resolves: bz#1598842 + (Compile out unused block drivers) + +* Mon Nov 26 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 +- kvm-configure-add-test-for-libudev.patch [bz#1636185] +- kvm-qga-linux-report-disk-serial-number.patch [bz#1636185] +- kvm-qga-linux-return-disk-device-in-guest-get-fsinfo.patch [bz#1636185] +- kvm-qemu-error-introduce-error-warn-_report_once.patch [bz#1625173] +- kvm-intel-iommu-start-to-use-error_report_once.patch [bz#1625173] +- kvm-intel-iommu-replace-more-vtd_err_-traces.patch [bz#1625173] +- kvm-intel_iommu-introduce-vtd_reset_caches.patch [bz#1625173] +- kvm-intel_iommu-better-handling-of-dmar-state-switch.patch [bz#1625173] +- kvm-intel_iommu-move-ce-fetching-out-when-sync-shadow.patch [bz#1625173 bz#1629616] +- kvm-intel_iommu-handle-invalid-ce-for-shadow-sync.patch [bz#1625173 bz#1629616] +- kvm-block-remove-bdrv_dirty_bitmap_make_anon.patch [bz#1518989] +- kvm-block-simplify-code-around-releasing-bitmaps.patch [bz#1518989] +- kvm-hbitmap-Add-advance-param-to-hbitmap_iter_next.patch [bz#1518989] +- kvm-test-hbitmap-Add-non-advancing-iter_next-tests.patch [bz#1518989] +- kvm-block-dirty-bitmap-Add-bdrv_dirty_iter_next_area.patch [bz#1518989] +- kvm-blockdev-backup-add-bitmap-argument.patch [bz#1518989] +- kvm-dirty-bitmap-switch-assert-fails-to-errors-in-bdrv_m.patch [bz#1518989] +- kvm-dirty-bitmap-rename-bdrv_undo_clear_dirty_bitmap.patch [bz#1518989] +- kvm-dirty-bitmap-make-it-possible-to-restore-bitmap-afte.patch [bz#1518989] +- kvm-blockdev-rename-block-dirty-bitmap-clear-transaction.patch [bz#1518989] +- kvm-qapi-add-transaction-support-for-x-block-dirty-bitma.patch [bz#1518989] +- kvm-block-dirty-bitmaps-add-user_locked-status-checker.patch [bz#1518989] +- kvm-block-dirty-bitmaps-fix-merge-permissions.patch [bz#1518989] +- kvm-block-dirty-bitmaps-allow-clear-on-disabled-bitmaps.patch [bz#1518989] +- kvm-block-dirty-bitmaps-prohibit-enable-disable-on-locke.patch [bz#1518989] +- kvm-block-backup-prohibit-backup-from-using-in-use-bitma.patch [bz#1518989] +- kvm-nbd-forbid-use-of-frozen-bitmaps.patch [bz#1518989] +- kvm-bitmap-Update-count-after-a-merge.patch [bz#1518989] +- kvm-iotests-169-drop-deprecated-autoload-parameter.patch [bz#1518989] +- kvm-block-qcow2-improve-error-message-in-qcow2_inactivat.patch [bz#1518989] +- kvm-bloc-qcow2-drop-dirty_bitmaps_loaded-state-variable.patch [bz#1518989] +- kvm-dirty-bitmaps-clean-up-bitmaps-loading-and-migration.patch [bz#1518989] +- kvm-iotests-improve-169.patch [bz#1518989] +- kvm-iotests-169-add-cases-for-source-vm-resuming.patch [bz#1518989] +- kvm-pc-dimm-turn-alignment-assert-into-check.patch [bz#1630116] +- Resolves: bz#1518989 + (RFE: QEMU Incremental live backup) +- Resolves: bz#1625173 + ([NVMe Device Assignment] Guest could not boot up with q35+iommu) +- Resolves: bz#1629616 + (boot guest with q35+vIOMMU+ device assignment, qemu terminal shows "qemu-kvm: VFIO_UNMAP_DMA: -22" when return assigned network devices from vfio driver to ixgbe in guest) +- Resolves: bz#1630116 + (pc_dimm_get_free_addr: assertion failed: (QEMU_ALIGN_UP(address_space_start, align) == address_space_start)) +- Resolves: bz#1636185 + ([RFE] Report disk device name and serial number (qemu-guest-agent on Linux)) + +* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-42.el8 +- kvm-luks-Allow-share-rw-on.patch [bz#1629701] +- kvm-redhat-reenable-gluster-support.patch [bz#1599340] +- kvm-redhat-bump-libusb-requirement.patch [bz#1627970] +- Resolves: bz#1599340 + (Reenable glusterfs in qemu-kvm once BZ#1567292 gets fixed) +- Resolves: bz#1627970 + (symbol lookup error: /usr/libexec/qemu-kvm: undefined symbol: libusb_set_option) +- Resolves: bz#1629701 + ("share-rw=on" does not work for luks format image - Fast Train) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-41.el8 +- kvm-block-rbd-pull-out-qemu_rbd_convert_options.patch [bz#1635585] +- kvm-block-rbd-Attempt-to-parse-legacy-filenames.patch [bz#1635585] +- kvm-block-rbd-add-deprecation-documentation-for-filename.patch [bz#1635585] +- kvm-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch [bz#1635585] +- Resolves: bz#1635585 + (rbd json format of 7.6 is incompatible with 7.5) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 +- kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] +- kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] +- kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] +- kvm-test-bdrv-drain-bdrv_drain-works-with-cross-AioConte.patch [bz#1637976] +- kvm-block-Use-bdrv_do_drain_begin-end-in-bdrv_drain_all.patch [bz#1637976] +- kvm-block-Remove-recursive-parameter-from-bdrv_drain_inv.patch [bz#1637976] +- kvm-block-Don-t-manually-poll-in-bdrv_drain_all.patch [bz#1637976] +- kvm-tests-test-bdrv-drain-bdrv_drain_all-works-in-corout.patch [bz#1637976] +- kvm-block-Avoid-unnecessary-aio_poll-in-AIO_WAIT_WHILE.patch [bz#1637976] +- kvm-block-Really-pause-block-jobs-on-drain.patch [bz#1637976] +- kvm-block-Remove-bdrv_drain_recurse.patch [bz#1637976] +- kvm-test-bdrv-drain-Add-test-for-node-deletion.patch [bz#1637976] +- kvm-block-Drain-recursively-with-a-single-BDRV_POLL_WHIL.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-node-deletion-in-subtree-recurs.patch [bz#1637976] +- kvm-block-Don-t-poll-in-parent-drain-callbacks.patch [bz#1637976] +- kvm-test-bdrv-drain-Graph-change-through-parent-callback.patch [bz#1637976] +- kvm-block-Defer-.bdrv_drain_begin-callback-to-polling-ph.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-that-bdrv_drain_invoke-doesn-t-.patch [bz#1637976] +- kvm-block-Allow-AIO_WAIT_WHILE-with-NULL-ctx.patch [bz#1637976] +- kvm-block-Move-bdrv_drain_all_begin-out-of-coroutine-con.patch [bz#1637976] +- kvm-block-ignore_bds_parents-parameter-for-drain-functio.patch [bz#1637976] +- kvm-block-Allow-graph-changes-in-bdrv_drain_all_begin-en.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-graph-changes-in-drain_all-sect.patch [bz#1637976] +- kvm-block-Poll-after-drain-on-attaching-a-node.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-bdrv_append-to-drained-node.patch [bz#1637976] +- kvm-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch [bz#1637976] +- kvm-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch [bz#1637976] +- kvm-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch [bz#1637976] +- kvm-job-Fix-missing-locking-due-to-mismerge.patch [bz#1637976] +- kvm-blockjob-Wake-up-BDS-when-job-becomes-idle.patch [bz#1637976] +- kvm-aio-wait-Increase-num_waiters-even-in-home-thread.patch [bz#1637976] +- kvm-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch [bz#1637976] +- kvm-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch [bz#1637976] +- kvm-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch [bz#1637976] +- kvm-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch [bz#1637976] +- kvm-block-backend-Add-.drained_poll-callback.patch [bz#1637976] +- kvm-block-backend-Fix-potential-double-blk_delete.patch [bz#1637976] +- kvm-block-backend-Decrease-in_flight-only-after-callback.patch [bz#1637976] +- kvm-blockjob-Lie-better-in-child_job_drained_poll.patch [bz#1637976] +- kvm-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch [bz#1637976] +- kvm-job-Avoid-deadlocks-in-job_completed_txn_abort.patch [bz#1637976] +- kvm-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch [bz#1637976] +- kvm-test-bdrv-drain-Fix-outdated-comments.patch [bz#1637976] +- kvm-block-Use-a-single-global-AioWait.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-draining-job-source-child-and-p.patch [bz#1637976] +- kvm-qemu-img-Fix-assert-when-mapping-unaligned-raw-file.patch [bz#1639374] +- kvm-iotests-Add-test-221-to-catch-qemu-img-map-regressio.patch [bz#1639374] +- Resolves: bz#1609327 + (qemu-kvm[37046]: Could not find keytab file: /etc/qemu/krb5.tab: Unknown error 49408) +- Resolves: bz#1636142 + (qemu NBD_CMD_CACHE flaws impacting non-qemu NBD clients) +- Resolves: bz#1637976 + (Crashes and hangs with iothreads vs. block jobs) +- Resolves: bz#1639374 + (qemu-img map 'Aborted (core dumped)' when specifying a plain file) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-39.el8 +- kvm-linux-headers-update.patch [bz#1508142] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] +- kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] +- kvm-s390x-ap-base-Adjunct-Processor-AP-object-model.patch [bz#1508142] +- kvm-s390x-vfio-ap-Introduce-VFIO-AP-device.patch [bz#1508142] +- kvm-s390-doc-detailed-specifications-for-AP-virtualizati.patch [bz#1508142] +- Resolves: bz#1508142 + ([IBM 8.0 FEAT] KVM: Guest-dedicated Crypto Adapters - qemu part) + +* Mon Oct 15 2018 Danilo Cesar Lemes de Paula - 2.12.0-38.el8 +- kvm-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch [bz#1609235] +- kvm-add-udev-kvm-check.patch [bz#1552663] +- kvm-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch [bz#1623085] +- kvm-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch [bz#1623085] +- kvm-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch [bz#1632622] +- kvm-aio-posix-compute-timeout-before-polling.patch [bz#1632622] +- kvm-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch [bz#1632622] +- kvm-intel-iommu-send-PSI-always-even-if-across-PDEs.patch [bz#1450712] +- kvm-intel-iommu-remove-IntelIOMMUNotifierNode.patch [bz#1450712] +- kvm-intel-iommu-add-iommu-lock.patch [bz#1450712] +- kvm-intel-iommu-only-do-page-walk-for-MAP-notifiers.patch [bz#1450712] +- kvm-intel-iommu-introduce-vtd_page_walk_info.patch [bz#1450712] +- kvm-intel-iommu-pass-in-address-space-when-page-walk.patch [bz#1450712] +- kvm-intel-iommu-trace-domain-id-during-page-walk.patch [bz#1450712] +- kvm-util-implement-simple-iova-tree.patch [bz#1450712] +- kvm-intel-iommu-rework-the-page-walk-logic.patch [bz#1450712] +- kvm-i386-define-the-ssbd-CPUID-feature-bit-CVE-2018-3639.patch [bz#1633928] +- Resolves: bz#1450712 + (Booting nested guest with vIOMMU, the assigned network devices can not receive packets (qemu)) +- Resolves: bz#1552663 + (81-kvm-rhel.rules is no longer part of initscripts) +- Resolves: bz#1609235 + (Win2016 guest can't recognize pc-dimm hotplugged to node 0) +- Resolves: bz#1623085 + (VM doesn't boot from HD) +- Resolves: bz#1632622 + (~40% virtio_blk disk performance drop for win2012r2 guest when comparing qemu-kvm-rhev-2.12.0-9 with qemu-kvm-rhev-2.12.0-12) +- Resolves: bz#1633928 + (CVE-2018-3639 qemu-kvm: hw: cpu: speculative store bypass [rhel-8.0]) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-37.el8 +- kvm-block-for-jobs-do-not-clear-user_paused-until-after-.patch [bz#1635583] +- kvm-iotests-Add-failure-matching-to-common.qemu.patch [bz#1635583] +- kvm-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch [bz#1635583] +- Resolves: bz#1635583 + (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-36.el8 +- kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] +- kvm-Disable-ivshmem.patch [bz#1621817] +- kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] +- kvm-commit-Add-top-node-base-node-options.patch [bz#1637970] +- kvm-qemu-iotests-Test-commit-with-top-node-base-node.patch [bz#1637970] +- Resolves: bz#1621817 + (Disable IVSHMEM in RHEL 8) +- Resolves: bz#1637963 + (Segfault on 'blockdev-mirror' with same node as source and target) +- Resolves: bz#1637970 + (allow using node-names with block-commit) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-35.el8 +- kvm-redhat-make-the-plugins-executable.patch [bz#1638304] +- Resolves: bz#1638304 + (the driver packages lack all the library Requires) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-34.el8 +- kvm-seccomp-allow-sched_setscheduler-with-SCHED_IDLE-pol.patch [bz#1618356] +- kvm-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch [bz#1618356] +- kvm-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch [bz#1618356] +- kvm-configure-require-libseccomp-2.2.0.patch [bz#1618356] +- kvm-seccomp-set-the-seccomp-filter-to-all-threads.patch [bz#1618356] +- kvm-memory-cleanup-side-effects-of-memory_region_init_fo.patch [bz#1600365] +- Resolves: bz#1600365 + (QEMU core dumped when hotplug memory exceeding host hugepages and with discard-data=yes) +- Resolves: bz#1618356 + (qemu-kvm: Qemu: seccomp: blacklist is not applied to all threads [rhel-8]) + +* Fri Oct 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-33.el8 +- kvm-migration-postcopy-Clear-have_listen_thread.patch [bz#1608765] +- kvm-migration-cleanup-in-error-paths-in-loadvm.patch [bz#1608765] +- kvm-jobs-change-start-callback-to-run-callback.patch [bz#1632939] +- kvm-jobs-canonize-Error-object.patch [bz#1632939] +- kvm-jobs-add-exit-shim.patch [bz#1632939] +- kvm-block-commit-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-mirror-utilize-job_exit-shim.patch [bz#1632939] +- kvm-jobs-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-backup-make-function-variables-consistently-na.patch [bz#1632939] +- kvm-jobs-remove-ret-argument-to-job_completed-privatize-.patch [bz#1632939] +- kvm-jobs-remove-job_defer_to_main_loop.patch [bz#1632939] +- kvm-block-commit-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-mirror-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-stream-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-commit-refactor-commit-to-use-job-callbacks.patch [bz#1632939] +- kvm-block-mirror-don-t-install-backing-chain-on-abort.patch [bz#1632939] +- kvm-block-mirror-conservative-mirror_exit-refactor.patch [bz#1632939] +- kvm-block-stream-refactor-stream-to-use-job-callbacks.patch [bz#1632939] +- kvm-tests-blockjob-replace-Blockjob-with-Job.patch [bz#1632939] +- kvm-tests-test-blockjob-remove-exit-callback.patch [bz#1632939] +- kvm-tests-test-blockjob-txn-move-.exit-to-.clean.patch [bz#1632939] +- kvm-jobs-remove-.exit-callback.patch [bz#1632939] +- kvm-qapi-block-commit-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-mirror-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-stream-expose-new-job-properties.patch [bz#1632939] +- kvm-block-backup-qapi-documentation-fixup.patch [bz#1632939] +- kvm-blockdev-document-transactional-shortcomings.patch [bz#1632939] +- Resolves: bz#1608765 + (After postcopy migration, do savevm and loadvm, guest hang and call trace) +- Resolves: bz#1632939 + (qemu blockjobs other than backup do not support job-finalize or job-dismiss) + +* Fri Sep 28 2018 Danilo Cesar Lemes de Paula - 2.12.0-32.el8 +- kvm-Re-enable-disabled-Hyper-V-enlightenments.patch [bz#1625185] +- kvm-Fix-annocheck-issues.patch [bz#1624164] +- kvm-exec-check-that-alignment-is-a-power-of-two.patch [bz#1630746] +- kvm-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch [bz#1575925] +- Resolves: bz#1575925 + ("SSL: no alternative certificate subject name matches target host name" error even though sslverify = off) +- Resolves: bz#1624164 + (Review annocheck distro flag failures in qemu-kvm) +- Resolves: bz#1625185 + (Re-enable disabled Hyper-V enlightenments) +- Resolves: bz#1630746 + (qemu_ram_mmap: Assertion `is_power_of_2(align)' failed) + +* Tue Sep 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-31.el8 +- kvm-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch [bz#1619804] +- kvm-redhat-enable-opengl-add-build-and-runtime-deps.patch [bz#1618412] +- Resolves: bz#1618412 + (Enable opengl (for intel vgpu display)) +- Resolves: bz#1619804 + (kernel panic in init_amd_cacheinfo) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-30.el8 +- kvm-redhat-Disable-vhost-crypto.patch [bz#1625668] +- Resolves: bz#1625668 + (Decide if we should disable 'vhost-crypto' or not) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-29.el8 +- kvm-target-i386-sev-fix-memory-leaks.patch [bz#1615717] +- kvm-i386-Fix-arch_query_cpu_model_expansion-leak.patch [bz#1615717] +- kvm-redhat-Update-build-configuration.patch [bz#1573156] +- Resolves: bz#1573156 + (Update build configure for QEMU 2.12.0) +- Resolves: bz#1615717 + (Memory leaks) + +* Tue Sep 04 2018 Danilo Cesar Lemes de Paula - 2.12.0-28.el8 +- kvm-e1000e-Do-not-auto-clear-ICR-bits-which-aren-t-set-i.patch [bz#1596024] +- kvm-e1000e-Prevent-MSI-MSI-X-storms.patch [bz#1596024] +- kvm-Drop-build_configure.sh-and-Makefile.local-files.patch [] +- kvm-Fix-subject-line-in-.gitpublish.patch [] +- Resolves: bz#1596024 + (The network link can't be detected on guest when the guest uses e1000e model type) + +* Wed Aug 29 2018 Danilo Cesar Lemes de Paula - 2.12.0-27.el8 +- kvm-Fix-libusb-1.0.22-deprecated-libusb_set_debug-with-l.patch [bz#1622656] +- Resolves: bz#1622656 + (qemu-kvm fails to build due to libusb_set_debug being deprecated) + +* Fri Aug 17 2018 Danilo Cesar Lemes de Paula - 2.12.0-26.el8 +- kvm-redhat-remove-extra-in-rhel_rhev_conflicts-macro.patch [bz#1618752] +- Resolves: bz#1618752 + (qemu-kvm can't be installed in RHEL-8 as it Conflicts with itself.) + +* Thu Aug 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-25.el8 +- kvm-Migration-TLS-Fix-crash-due-to-double-cleanup.patch [bz#1594384] +- Resolves: bz#1594384 + (2.12 migration fixes) + +* Tue Aug 14 2018 Danilo Cesar Lemes de Paula - 2.12.0-24.el8 +- kvm-Add-qemu-keymap-to-qemu-kvm-common.patch [bz#1593117] +- Resolves: bz#1593117 + (add qemu-keymap utility) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-23.el8 +- Fixing an issue with some old command in the spec file + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-22.el8 +- Fix an issue with the build_configure script. +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) + + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-20.el8 +- kvm-migration-stop-compressing-page-in-migration-thread.patch [bz#1594384] +- kvm-migration-stop-compression-to-allocate-and-free-memo.patch [bz#1594384] +- kvm-migration-stop-decompression-to-allocate-and-free-me.patch [bz#1594384] +- kvm-migration-detect-compression-and-decompression-error.patch [bz#1594384] +- kvm-migration-introduce-control_save_page.patch [bz#1594384] +- kvm-migration-move-some-code-to-ram_save_host_page.patch [bz#1594384] +- kvm-migration-move-calling-control_save_page-to-the-comm.patch [bz#1594384] +- kvm-migration-move-calling-save_zero_page-to-the-common-.patch [bz#1594384] +- kvm-migration-introduce-save_normal_page.patch [bz#1594384] +- kvm-migration-remove-ram_save_compressed_page.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-memory-leak-in-dirt.patch [bz#1594384] +- kvm-migration-fix-saving-normal-page-even-if-it-s-been-c.patch [bz#1594384] +- kvm-migration-update-index-field-when-delete-or-qsort-RD.patch [bz#1594384] +- kvm-migration-introduce-decompress-error-check.patch [bz#1594384] +- kvm-migration-Don-t-activate-block-devices-if-using-S.patch [bz#1594384] +- kvm-migration-not-wait-RDMA_CM_EVENT_DISCONNECTED-event-.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-dirty_bitmap_load.patch [bz#1594384] +- kvm-s390x-add-RHEL-7.6-machine-type-for-ccw.patch [bz#1595718] +- kvm-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch [bz#1595718] +- kvm-linux-headers-asm-s390-kvm.h-header-sync.patch [bz#1612938] +- kvm-s390x-kvm-add-etoken-facility.patch [bz#1612938] +- Resolves: bz#1594384 + (2.12 migration fixes) +- Resolves: bz#1595718 + (Add ppa15/bpb to the default cpu model for z196 and higher in the 7.6 s390-ccw-virtio machine) +- Resolves: bz#1612938 + (Add etoken support to qemu-kvm for s390x KVM guests) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-18.el8 + Mass import from RHEL 7.6 qemu-kvm-rhev, including fixes to the following BZs: + +- kvm-AArch64-Add-virt-rhel7.6-machine-type.patch [bz#1558723] +- kvm-cpus-Fix-event-order-on-resume-of-stopped-guest.patch [bz#1566153] +- kvm-qemu-img-Check-post-truncation-size.patch [bz#1523065] +- kvm-vga-catch-depth-0.patch [bz#1575541] +- kvm-Fix-x-hv-max-vps-compat-value-for-7.4-machine-type.patch [bz#1583959] +- kvm-ccid-card-passthru-fix-regression-in-realize.patch [bz#1584984] +- kvm-Use-4-MB-vram-for-cirrus.patch [bz#1542080] +- kvm-spapr_pci-Remove-unhelpful-pagesize-warning.patch [bz#1505664] +- kvm-rpm-Add-nvme-VFIO-driver-to-rw-whitelist.patch [bz#1416180] +- kvm-qobject-Use-qobject_to-instead-of-type-cast.patch [bz#1557995] +- kvm-qobject-Ensure-base-is-at-offset-0.patch [bz#1557995] +- kvm-qobject-use-a-QObjectBase_-struct.patch [bz#1557995] +- kvm-qobject-Replace-qobject_incref-QINCREF-qobject_decre.patch [bz#1557995] +- kvm-qobject-Modify-qobject_ref-to-return-obj.patch [bz#1557995] +- kvm-rbd-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-iscsi-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-block-Add-block-specific-QDict-header.patch [bz#1557995] +- kvm-qobject-Move-block-specific-qdict-code-to-block-qdic.patch [bz#1557995] +- kvm-block-Fix-blockdev-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Fix-drive-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Clean-up-a-misuse-of-qobject_to-in-.bdrv_co_cr.patch [bz#1557995] +- kvm-block-Factor-out-qobject_input_visitor_new_flat_conf.patch [bz#1557995] +- kvm-block-Make-remaining-uses-of-qobject-input-visitor-m.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_flatten_qdict.patch [bz#1557995] +- kvm-block-qdict-Tweak-qdict_flatten_qdict-qdict_flatten_.patch [bz#1557995] +- kvm-block-qdict-Clean-up-qdict_crumple-a-bit.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_is_list-some.patch [bz#1557995] +- kvm-check-block-qdict-Rename-qdict_flatten-s-variables-f.patch [bz#1557995] +- kvm-check-block-qdict-Cover-flattening-of-empty-lists-an.patch [bz#1557995] +- kvm-block-Fix-blockdev-blockdev-add-for-empty-objects-an.patch [bz#1557995] +- kvm-rbd-New-parameter-auth-client-required.patch [bz#1557995] +- kvm-rbd-New-parameter-key-secret.patch [bz#1557995] +- kvm-block-mirror-honor-ratelimit-again.patch [bz#1572856] +- kvm-block-mirror-Make-cancel-always-cancel-pre-READY.patch [bz#1572856] +- kvm-iotests-Add-test-for-cancelling-a-mirror-job.patch [bz#1572856] +- kvm-iotests-Split-214-off-of-122.patch [bz#1518738] +- kvm-block-Add-COR-filter-driver.patch [bz#1518738] +- kvm-block-BLK_PERM_WRITE-includes-._UNCHANGED.patch [bz#1518738] +- kvm-block-Add-BDRV_REQ_WRITE_UNCHANGED-flag.patch [bz#1518738] +- kvm-block-Set-BDRV_REQ_WRITE_UNCHANGED-for-COR-writes.patch [bz#1518738] +- kvm-block-quorum-Support-BDRV_REQ_WRITE_UNCHANGED.patch [bz#1518738] +- kvm-block-Support-BDRV_REQ_WRITE_UNCHANGED-in-filters.patch [bz#1518738] +- kvm-iotests-Clean-up-wrap-image-in-197.patch [bz#1518738] +- kvm-iotests-Copy-197-for-COR-filter-driver.patch [bz#1518738] +- kvm-iotests-Add-test-for-COR-across-nodes.patch [bz#1518738] +- kvm-qemu-io-Use-purely-string-blockdev-options.patch [bz#1576598] +- kvm-qemu-img-Use-only-string-options-in-img_open_opts.patch [bz#1576598] +- kvm-iotests-Add-test-for-U-force-share-conflicts.patch [bz#1576598] +- kvm-qemu-io-Drop-command-functions-return-values.patch [bz#1519617] +- kvm-qemu-io-Let-command-functions-return-error-code.patch [bz#1519617] +- kvm-qemu-io-Exit-with-error-when-a-command-failed.patch [bz#1519617] +- kvm-iotests.py-Add-qemu_io_silent.patch [bz#1519617] +- kvm-iotests-Let-216-make-use-of-qemu-io-s-exit-code.patch [bz#1519617] +- kvm-qcow2-Repair-OFLAG_COPIED-when-fixing-leaks.patch [bz#1527085] +- kvm-iotests-Repairing-error-during-snapshot-deletion.patch [bz#1527085] +- kvm-block-Make-bdrv_is_writable-public.patch [bz#1588039] +- kvm-qcow2-Do-not-mark-inactive-images-corrupt.patch [bz#1588039] +- kvm-iotests-Add-case-for-a-corrupted-inactive-image.patch [bz#1588039] +- kvm-main-loop-drop-spin_counter.patch [bz#1168213] +- kvm-target-ppc-Factor-out-the-parsing-in-kvmppc_get_cpu_.patch [bz#1560847] +- kvm-target-ppc-Don-t-require-private-l1d-cache-on-POWER8.patch [bz#1560847] +- kvm-ppc-spapr_caps-Don-t-disable-cap_cfpc-on-POWER8-by-d.patch [bz#1560847] +- kvm-qxl-fix-local-renderer-crash.patch [bz#1567733] +- kvm-qemu-img-Amendment-support-implies-create_opts.patch [bz#1537956] +- kvm-block-Add-Error-parameter-to-bdrv_amend_options.patch [bz#1537956] +- kvm-qemu-option-Pull-out-Supported-options-print.patch [bz#1537956] +- kvm-qemu-img-Add-print_amend_option_help.patch [bz#1537956] +- kvm-qemu-img-Recognize-no-creation-support-in-o-help.patch [bz#1537956] +- kvm-iotests-Test-help-option-for-unsupporting-formats.patch [bz#1537956] +- kvm-iotests-Rework-113.patch [bz#1537956] +- kvm-qemu-img-Resolve-relative-backing-paths-in-rebase.patch [bz#1569835] +- kvm-iotests-Add-test-for-rebasing-with-relative-paths.patch [bz#1569835] +- kvm-qemu-img-Special-post-backing-convert-handling.patch [bz#1527898] +- kvm-iotests-Test-post-backing-convert-target-behavior.patch [bz#1527898] +- kvm-migration-calculate-expected_downtime-with-ram_bytes.patch [bz#1564576] +- kvm-sheepdog-Fix-sd_co_create_opts-memory-leaks.patch [bz#1513543] +- kvm-qemu-iotests-reduce-chance-of-races-in-185.patch [bz#1513543] +- kvm-blockjob-do-not-cancel-timer-in-resume.patch [bz#1513543] +- kvm-nfs-Fix-error-path-in-nfs_options_qdict_to_qapi.patch [bz#1513543] +- kvm-nfs-Remove-processed-options-from-QDict.patch [bz#1513543] +- kvm-blockjob-drop-block_job_pause-resume_all.patch [bz#1513543] +- kvm-blockjob-expose-error-string-via-query.patch [bz#1513543] +- kvm-blockjob-Fix-assertion-in-block_job_finalize.patch [bz#1513543] +- kvm-blockjob-Wrappers-for-progress-counter-access.patch [bz#1513543] +- kvm-blockjob-Move-RateLimit-to-BlockJob.patch [bz#1513543] +- kvm-blockjob-Implement-block_job_set_speed-centrally.patch [bz#1513543] +- kvm-blockjob-Introduce-block_job_ratelimit_get_delay.patch [bz#1513543] +- kvm-blockjob-Add-block_job_driver.patch [bz#1513543] +- kvm-blockjob-Update-block-job-pause-resume-documentation.patch [bz#1513543] +- kvm-blockjob-Improve-BlockJobInfo.offset-len-documentati.patch [bz#1513543] +- kvm-job-Create-Job-JobDriver-and-job_create.patch [bz#1513543] +- kvm-job-Rename-BlockJobType-into-JobType.patch [bz#1513543] +- kvm-job-Add-JobDriver.job_type.patch [bz#1513543] +- kvm-job-Add-job_delete.patch [bz#1513543] +- kvm-job-Maintain-a-list-of-all-jobs.patch [bz#1513543] +- kvm-job-Move-state-transitions-to-Job.patch [bz#1513543] +- kvm-job-Add-reference-counting.patch [bz#1513543] +- kvm-job-Move-cancelled-to-Job.patch [bz#1513543] +- kvm-job-Add-Job.aio_context.patch [bz#1513543] +- kvm-job-Move-defer_to_main_loop-to-Job.patch [bz#1513543] +- kvm-job-Move-coroutine-and-related-code-to-Job.patch [bz#1513543] +- kvm-job-Add-job_sleep_ns.patch [bz#1513543] +- kvm-job-Move-pause-resume-functions-to-Job.patch [bz#1513543] +- kvm-job-Replace-BlockJob.completed-with-job_is_completed.patch [bz#1513543] +- kvm-job-Move-BlockJobCreateFlags-to-Job.patch [bz#1513543] +- kvm-blockjob-Split-block_job_event_pending.patch [bz#1513543] +- kvm-job-Add-job_event_.patch [bz#1513543] +- kvm-job-Move-single-job-finalisation-to-Job.patch [bz#1513543] +- kvm-job-Convert-block_job_cancel_async-to-Job.patch [bz#1513543] +- kvm-job-Add-job_drain.patch [bz#1513543] +- kvm-job-Move-.complete-callback-to-Job.patch [bz#1513543] +- kvm-job-Move-job_finish_sync-to-Job.patch [bz#1513543] +- kvm-job-Switch-transactions-to-JobTxn.patch [bz#1513543] +- kvm-job-Move-transactions-to-Job.patch [bz#1513543] +- kvm-job-Move-completion-and-cancellation-to-Job.patch [bz#1513543] +- kvm-block-Cancel-job-in-bdrv_close_all-callers.patch [bz#1513543] +- kvm-job-Add-job_yield.patch [bz#1513543] +- kvm-job-Add-job_dismiss.patch [bz#1513543] +- kvm-job-Add-job_is_ready.patch [bz#1513543] +- kvm-job-Add-job_transition_to_ready.patch [bz#1513543] +- kvm-job-Move-progress-fields-to-Job.patch [bz#1513543] +- kvm-job-Introduce-qapi-job.json.patch [bz#1513543] +- kvm-job-Add-JOB_STATUS_CHANGE-QMP-event.patch [bz#1513543] +- kvm-job-Add-lifecycle-QMP-commands.patch [bz#1513543] +- kvm-job-Add-query-jobs-QMP-command.patch [bz#1513543] +- kvm-blockjob-Remove-BlockJob.driver.patch [bz#1513543] +- kvm-iotests-Move-qmp_to_opts-to-VM.patch [bz#1513543] +- kvm-qemu-iotests-Test-job-with-block-jobs.patch [bz#1513543] +- kvm-vdi-Fix-vdi_co_do_create-return-value.patch [bz#1513543] +- kvm-vhdx-Fix-vhdx_co_create-return-value.patch [bz#1513543] +- kvm-job-Add-error-message-for-failing-jobs.patch [bz#1513543] +- kvm-block-create-Make-x-blockdev-create-a-job.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.get_qmp_events_filtered.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.qmp_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-iotests.img_info_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.run_job.patch [bz#1513543] +- kvm-qemu-iotests-iotests.py-helper-for-non-file-protocol.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-206-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-207-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-210-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-211-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-212-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-213-for-blockdev-create-job.patch [bz#1513543] +- kvm-block-create-Mark-blockdev-create-stable.patch [bz#1513543] +- kvm-jobs-fix-stale-wording.patch [bz#1513543] +- kvm-jobs-fix-verb-references-in-docs.patch [bz#1513543] +- kvm-iotests-Fix-219-s-timing.patch [bz#1513543] +- kvm-iotests-improve-pause_job.patch [bz#1513543] +- kvm-rpm-Whitelist-copy-on-read-block-driver.patch [bz#1518738] +- kvm-rpm-add-throttle-driver-to-rw-whitelist.patch [bz#1591076] +- kvm-usb-host-skip-open-on-pending-postload-bh.patch [bz#1572851] +- kvm-i386-Define-the-Virt-SSBD-MSR-and-handling-of-it-CVE.patch [bz#1574216] +- kvm-i386-define-the-AMD-virt-ssbd-CPUID-feature-bit-CVE-.patch [bz#1574216] +- kvm-block-file-posix-Pass-FD-to-locking-helpers.patch [bz#1519144] +- kvm-block-file-posix-File-locking-during-creation.patch [bz#1519144] +- kvm-iotests-Add-creation-test-to-153.patch [bz#1519144] +- kvm-vhost-user-add-Net-prefix-to-internal-state-structur.patch [bz#1526645] +- kvm-virtio-support-setting-memory-region-based-host-noti.patch [bz#1526645] +- kvm-vhost-user-support-receiving-file-descriptors-in-sla.patch [bz#1526645] +- kvm-osdep-add-wait.h-compat-macros.patch [bz#1526645] +- kvm-vhost-user-bridge-support-host-notifier.patch [bz#1526645] +- kvm-vhost-allow-backends-to-filter-memory-sections.patch [bz#1526645] +- kvm-vhost-user-allow-slave-to-send-fds-via-slave-channel.patch [bz#1526645] +- kvm-vhost-user-introduce-shared-vhost-user-state.patch [bz#1526645] +- kvm-vhost-user-support-registering-external-host-notifie.patch [bz#1526645] +- kvm-libvhost-user-support-host-notifier.patch [bz#1526645] +- kvm-block-Introduce-API-for-copy-offloading.patch [bz#1482537] +- kvm-raw-Check-byte-range-uniformly.patch [bz#1482537] +- kvm-raw-Implement-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Implement-copy-offloading.patch [bz#1482537] +- kvm-file-posix-Implement-bdrv_co_copy_range.patch [bz#1482537] +- kvm-iscsi-Query-and-save-device-designator-when-opening.patch [bz#1482537] +- kvm-iscsi-Create-and-use-iscsi_co_wait_for_task.patch [bz#1482537] +- kvm-iscsi-Implement-copy-offloading.patch [bz#1482537] +- kvm-block-backend-Add-blk_co_copy_range.patch [bz#1482537] +- kvm-qemu-img-Convert-with-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Fix-src_offset-in-copy-offloading.patch [bz#1482537] +- kvm-iscsi-Don-t-blindly-use-designator-length-in-respons.patch [bz#1482537] +- kvm-file-posix-Fix-EINTR-handling.patch [bz#1482537] +- kvm-usb-storage-Add-rerror-werror-properties.patch [bz#1595180] +- kvm-numa-clarify-error-message-when-node-index-is-out-of.patch [bz#1578381] +- kvm-qemu-iotests-Update-026.out.nocache-reference-output.patch [bz#1528541] +- kvm-qcow2-Free-allocated-clusters-on-write-error.patch [bz#1528541] +- kvm-qemu-iotests-Test-qcow2-not-leaking-clusters-on-writ.patch [bz#1528541] +- kvm-qemu-options-Add-missing-newline-to-accel-help-text.patch [bz#1586313] +- kvm-xhci-fix-guest-triggerable-assert.patch [bz#1594135] +- kvm-virtio-gpu-tweak-scanout-disable.patch [bz#1589634] +- kvm-virtio-gpu-update-old-resource-too.patch [bz#1589634] +- kvm-virtio-gpu-disable-scanout-when-backing-resource-is-.patch [bz#1589634] +- kvm-block-Don-t-silently-truncate-node-names.patch [bz#1549654] +- kvm-pr-helper-fix-socket-path-default-in-help.patch [bz#1533158] +- kvm-pr-helper-fix-assertion-failure-on-failed-multipath-.patch [bz#1533158] +- kvm-pr-manager-helper-avoid-SIGSEGV-when-writing-to-the-.patch [bz#1533158] +- kvm-pr-manager-put-stubs-in-.c-file.patch [bz#1533158] +- kvm-pr-manager-add-query-pr-managers-QMP-command.patch [bz#1533158] +- kvm-pr-manager-helper-report-event-on-connection-disconn.patch [bz#1533158] +- kvm-pr-helper-avoid-error-on-PR-IN-command-with-zero-req.patch [bz#1533158] +- kvm-pr-helper-Rework-socket-path-handling.patch [bz#1533158] +- kvm-pr-manager-helper-fix-memory-leak-on-event.patch [bz#1533158] +- kvm-object-fix-OBJ_PROP_LINK_UNREF_ON_RELEASE-ambivalenc.patch [bz#1556678] +- kvm-usb-hcd-xhci-test-add-a-test-for-ccid-hotplug.patch [bz#1556678] +- kvm-Revert-usb-release-the-created-buses.patch [bz#1556678] +- kvm-file-posix-Fix-creation-locking.patch [bz#1599335] +- kvm-file-posix-Unlock-FD-after-creation.patch [bz#1599335] +- kvm-ahci-trim-signatures-on-raise-lower.patch [bz#1584914] +- kvm-ahci-fix-PxCI-register-race.patch [bz#1584914] +- kvm-ahci-don-t-schedule-unnecessary-BH.patch [bz#1584914] +- kvm-qcow2-Fix-qcow2_truncate-error-return-value.patch [bz#1595173] +- kvm-block-Convert-.bdrv_truncate-callback-to-coroutine_f.patch [bz#1595173] +- kvm-qcow2-Remove-coroutine-trampoline-for-preallocate_co.patch [bz#1595173] +- kvm-block-Move-bdrv_truncate-implementation-to-io.c.patch [bz#1595173] +- kvm-block-Use-tracked-request-for-truncate.patch [bz#1595173] +- kvm-file-posix-Make-.bdrv_co_truncate-asynchronous.patch [bz#1595173] +- kvm-block-Fix-copy-on-read-crash-with-partial-final-clus.patch [bz#1590640] +- kvm-block-fix-QEMU-crash-with-scsi-hd-and-drive_del.patch [bz#1599515] +- kvm-virtio-rng-process-pending-requests-on-DRIVER_OK.patch [bz#1576743] +- kvm-file-posix-specify-expected-filetypes.patch [bz#1525829] +- kvm-iotests-add-test-226-for-file-driver-types.patch [bz#1525829] +- kvm-block-dirty-bitmap-add-lock-to-bdrv_enable-disable_d.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-enable-disable.patch [bz#1207657] +- kvm-qmp-transaction-support-for-x-block-dirty-bitmap-ena.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-merge.patch [bz#1207657] +- kvm-qapi-add-disabled-parameter-to-block-dirty-bitmap-ad.patch [bz#1207657] +- kvm-block-dirty-bitmap-add-bdrv_enable_dirty_bitmap_lock.patch [bz#1207657] +- kvm-dirty-bitmap-fix-double-lock-on-bitmap-enabling.patch [bz#1207657] +- kvm-block-qcow2-bitmap-fix-free_bitmap_clusters.patch [bz#1207657] +- kvm-qcow2-add-overlap-check-for-bitmap-directory.patch [bz#1207657] +- kvm-blockdev-enable-non-root-nodes-for-backup-source.patch [bz#1207657] +- kvm-iotests-add-222-to-test-basic-fleecing.patch [bz#1207657] +- kvm-qcow2-Remove-dead-check-on-ret.patch [bz#1207657] +- kvm-block-Move-request-tracking-to-children-in-copy-offl.patch [bz#1207657] +- kvm-block-Fix-parameter-checking-in-bdrv_co_copy_range_i.patch [bz#1207657] +- kvm-block-Honour-BDRV_REQ_NO_SERIALISING-in-copy-range.patch [bz#1207657] +- kvm-backup-Use-copy-offloading.patch [bz#1207657] +- kvm-block-backup-disable-copy-offloading-for-backup.patch [bz#1207657] +- kvm-iotests-222-Don-t-run-with-luks.patch [bz#1207657] +- kvm-block-io-fix-copy_range.patch [bz#1207657] +- kvm-block-split-flags-in-copy_range.patch [bz#1207657] +- kvm-block-add-BDRV_REQ_SERIALISING-flag.patch [bz#1207657] +- kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch [bz#1207657] +- kvm-nbd-server-Reject-0-length-block-status-request.patch [bz#1207657] +- kvm-nbd-server-fix-trace.patch [bz#1207657] +- kvm-nbd-server-refactor-NBDExportMetaContexts.patch [bz#1207657] +- kvm-nbd-server-add-nbd_meta_empty_or_pattern-helper.patch [bz#1207657] +- kvm-nbd-server-implement-dirty-bitmap-export.patch [bz#1207657] +- kvm-qapi-new-qmp-command-nbd-server-add-bitmap.patch [bz#1207657] +- kvm-docs-interop-add-nbd.txt.patch [bz#1207657] +- kvm-nbd-server-introduce-NBD_CMD_CACHE.patch [bz#1207657] +- kvm-nbd-server-Silence-gcc-false-positive.patch [bz#1207657] +- kvm-nbd-server-Fix-dirty-bitmap-logic-regression.patch [bz#1207657] +- kvm-nbd-server-fix-nbd_co_send_block_status.patch [bz#1207657] +- kvm-nbd-client-Add-x-dirty-bitmap-to-query-bitmap-from-s.patch [bz#1207657] +- kvm-iotests-New-test-223-for-exporting-dirty-bitmap-over.patch [bz#1207657] +- kvm-hw-char-serial-Only-retry-if-qemu_chr_fe_write-retur.patch [bz#1592817] +- kvm-hw-char-serial-retry-write-if-EAGAIN.patch [bz#1592817] +- kvm-throttle-groups-fix-hang-when-group-member-leaves.patch [bz#1535914] +- kvm-Disable-aarch64-devices-reappeared-after-2.12-rebase.patch [bz#1586357] +- kvm-Disable-split-irq-device.patch [bz#1586357] +- kvm-Disable-AT24Cx-i2c-eeprom.patch [bz#1586357] +- kvm-Disable-CAN-bus-devices.patch [bz#1586357] +- kvm-Disable-new-superio-devices.patch [bz#1586357] +- kvm-Disable-new-pvrdma-device.patch [bz#1586357] +- kvm-qdev-add-HotplugHandler-post_plug-callback.patch [bz#1607891] +- kvm-virtio-scsi-fix-hotplug-reset-vs-event-race.patch [bz#1607891] +- kvm-e1000-Fix-tso_props-compat-for-82540em.patch [bz#1608778] +- kvm-slirp-correct-size-computation-while-concatenating-m.patch [bz#1586255] +- kvm-s390x-sclp-fix-maxram-calculation.patch [bz#1595740] +- kvm-redhat-Make-gitpublish-profile-the-default-one.patch [bz#1425820] +- Resolves: bz#1168213 + (main-loop: WARNING: I/O thread spun for 1000 iterations while doing stream block device.) +- Resolves: bz#1207657 + (RFE: QEMU Incremental live backup - push and pull modes) +- Resolves: bz#1416180 + (QEMU VFIO based block driver for NVMe devices) +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) +- Resolves: bz#1482537 + ([RFE] qemu-img copy-offloading (convert command)) +- Resolves: bz#1505664 + ("qemu-kvm: System page size 0x1000000 is not enabled in page_size_mask (0x11000). Performance may be slow" show up while using hugepage as guest's memory) +- Resolves: bz#1513543 + ([RFE] Add block job to create format on a storage device) +- Resolves: bz#1518738 + (Add 'copy-on-read' filter driver for use with blockdev-add) +- Resolves: bz#1519144 + (qemu-img: image locking doesn't cover image creation) +- Resolves: bz#1519617 + (The exit code should be non-zero when qemu-io reports an error) +- Resolves: bz#1523065 + ("qemu-img resize" should fail to decrease the size of logical partition/lvm/iSCSI image with raw format) +- Resolves: bz#1525829 + (can not boot up a scsi-block passthrough disk via -blockdev with error "cannot get SG_IO version number: Operation not supported. Is this a SCSI device?") +- Resolves: bz#1526645 + ([Intel 7.6 FEAT] vHost Data Plane Acceleration (vDPA) - vhost user client - qemu-kvm-rhev) +- Resolves: bz#1527085 + (The copied flag should be updated during '-r leaks') +- Resolves: bz#1527898 + ([RFE] qemu-img should leave cluster unallocated if it's read as zero throughout the backing chain) +- Resolves: bz#1528541 + (qemu-img check reports tons of leaked clusters after re-start nfs service to resume writing data in guest) +- Resolves: bz#1533158 + (QEMU support for libvirtd restarting qemu-pr-helper) +- Resolves: bz#1535914 + (Disable io throttling for one member disk of a group during io will induce the other one hang with io) +- Resolves: bz#1537956 + (RFE: qemu-img amend should list the true supported options) +- Resolves: bz#1542080 + (Qemu core dump at cirrus_invalidate_region) +- Resolves: bz#1549654 + (Reject node-names which would be truncated by the block layer commands) +- Resolves: bz#1556678 + (Hot plug usb-ccid for the 2nd time with the same ID as the 1st time failed) +- Resolves: bz#1557995 + (QAPI schema for RBD storage misses the 'password-secret' option) +- Resolves: bz#1558723 + (Create RHEL-7.6 QEMU machine type for AArch64) +- Resolves: bz#1560847 + ([Power8][FW b0320a_1812.861][rhel7.5rc2 3.10.0-861.el7.ppc64le][qemu-kvm-{ma,rhev}-2.10.0-21.el7_5.1.ppc64le] KVM guest does not default to ori type flush even with pseries-rhel7.5.0-sxxm) +- Resolves: bz#1564576 + (Pegas 1.1 - Require to backport qemu-kvm patch that fixes expected_downtime calculation during migration) +- Resolves: bz#1566153 + (IOERROR pause code lost after resuming a VM while I/O error is still present) +- Resolves: bz#1567733 + (qemu abort when migrate during guest reboot) +- Resolves: bz#1569835 + (qemu-img get wrong backing file path after rebasing image with relative path) +- Resolves: bz#1572851 + (Core dumped after migration when with usb-host) +- Resolves: bz#1572856 + ('block-job-cancel' can not cancel a "drive-mirror" job) +- Resolves: bz#1574216 + (CVE-2018-3639 qemu-kvm-rhev: hw: cpu: speculative store bypass [rhel-7.6]) +- Resolves: bz#1575541 + (qemu core dump while installing win10 guest) +- Resolves: bz#1576598 + (Segfault in qemu-io and qemu-img with -U --image-opts force-share=off) +- Resolves: bz#1576743 + (virtio-rng hangs when running on recent (2.x) QEMU versions) +- Resolves: bz#1578381 + (Error message need update when specify numa distance with node index >=128) +- Resolves: bz#1583959 + (Incorrect vcpu count limit for 7.4 machine types for windows guests) +- Resolves: bz#1584914 + (SATA emulator lags and hangs) +- Resolves: bz#1584984 + (Vm starts failed with 'passthrough' smartcard) +- Resolves: bz#1586255 + (CVE-2018-11806 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow while reassembling fragmented datagrams [rhel-7.6]) +- Resolves: bz#1586313 + (-smp option is not easily found in the output of qemu help) +- Resolves: bz#1586357 + (Disable new devices in 2.12) +- Resolves: bz#1588039 + (Possible assertion failure in qemu when a corrupted image is used during an incoming migration) +- Resolves: bz#1589634 + (Migration failed when rebooting guest with multiple virtio videos) +- Resolves: bz#1590640 + (qemu-kvm: block/io.c:1098: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.) +- Resolves: bz#1591076 + (The driver of 'throttle' is not whitelisted) +- Resolves: bz#1592817 + (Retrying on serial_xmit if the pipe is broken may compromise the Guest) +- Resolves: bz#1594135 + (system_reset many times linux guests cause qemu process Aborted) +- Resolves: bz#1595173 + (blockdev-create is blocking) +- Resolves: bz#1595180 + (Can't set rerror/werror with usb-storage) +- Resolves: bz#1595740 + (RHEL-Alt-7.6 - qemu has error during migration of larger guests) +- Resolves: bz#1599335 + (Image creation locking is too tight and is not properly released) +- Resolves: bz#1599515 + (qemu core-dump with aio_read via hmp (util/qemu-thread-posix.c:64: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed)) +- Resolves: bz#1607891 + (Hotplug events are sometimes lost with virtio-scsi + iothread) +- Resolves: bz#1608778 + (qemu/migration: migrate failed from RHEL.7.6 to RHEL.7.5 with e1000-82540em) + +* Mon Aug 06 2018 Danilo Cesar Lemes de Paula - 2.12.0-17.el8 +- kvm-linux-headers-Update-to-include-KVM_CAP_S390_HPAGE_1.patch [bz#1610906] +- kvm-s390x-Enable-KVM-huge-page-backing-support.patch [bz#1610906] +- kvm-redhat-s390x-add-hpage-1-to-kvm.conf.patch [bz#1610906] +- Resolves: bz#1610906 + ([IBM 8.0 FEAT] KVM: Huge Pages - libhugetlbfs Enablement - qemu-kvm part) + +* Tue Jul 31 2018 Danilo Cesar Lemes de Paula - 2.12.0-16.el8 +- kvm-spapr-Correct-inverted-test-in-spapr_pc_dimm_node.patch [bz#1601671] +- kvm-osdep-powerpc64-align-memory-to-allow-2MB-radix-THP-.patch [bz#1601317] +- kvm-RHEL-8.0-Add-pseries-rhel7.6.0-sxxm-machine-type.patch [bz#1595501] +- kvm-i386-Helpers-to-encode-cache-information-consistentl.patch [bz#1597739] +- kvm-i386-Add-cache-information-in-X86CPUDefinition.patch [bz#1597739] +- kvm-i386-Initialize-cache-information-for-EPYC-family-pr.patch [bz#1597739] +- kvm-i386-Add-new-property-to-control-cache-info.patch [bz#1597739] +- kvm-i386-Clean-up-cache-CPUID-code.patch [bz#1597739] +- kvm-i386-Populate-AMD-Processor-Cache-Information-for-cp.patch [bz#1597739] +- kvm-i386-Add-support-for-CPUID_8000_001E-for-AMD.patch [bz#1597739] +- kvm-i386-Fix-up-the-Node-id-for-CPUID_8000_001E.patch [bz#1597739] +- kvm-i386-Enable-TOPOEXT-feature-on-AMD-EPYC-CPU.patch [bz#1597739] +- kvm-i386-Remove-generic-SMT-thread-check.patch [bz#1597739] +- kvm-i386-Allow-TOPOEXT-to-be-enabled-on-older-kernels.patch [bz#1597739] +- Resolves: bz#1595501 + (Create pseries-rhel7.6.0-sxxm machine type) +- Resolves: bz#1597739 + (AMD EPYC/Zen SMT support for KVM / QEMU guest (qemu-kvm)) +- Resolves: bz#1601317 + (RHEL8.0 - qemu patch to align memory to allow 2MB THP) +- Resolves: bz#1601671 + (After rebooting guest,all the hot plug memory will be assigned to the 1st numa node.) + +* Tue Jul 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-15.el8 +- kvm-spapr-Add-ibm-max-associativity-domains-property.patch [bz#1599593] +- kvm-Revert-spapr-Don-t-allow-memory-hotplug-to-memory-le.patch [bz#1599593] +- kvm-simpletrace-Convert-name-from-mapping-record-to-str.patch [bz#1594969] +- kvm-tests-fix-TLS-handshake-failure-with-TLS-1.3.patch [bz#1602403] +- Resolves: bz#1594969 + (simpletrace.py fails when running with Python 3) +- Resolves: bz#1599593 + (User can't hotplug memory to less memory numa node on rhel8) +- Resolves: bz#1602403 + (test-crypto-tlssession unit test fails with assertions) + +* Mon Jul 09 2018 Danilo Cesar Lemes de Paula - 2.12.0-14.el8 +- kvm-vfio-pci-Default-display-option-to-off.patch [bz#1590511] +- kvm-python-futurize-f-libfuturize.fixes.fix_print_with_i.patch [bz#1571533] +- kvm-python-futurize-f-lib2to3.fixes.fix_except.patch [bz#1571533] +- kvm-Revert-Defining-a-shebang-for-python-scripts.patch [bz#1571533] +- kvm-spec-Fix-ambiguous-python-interpreter-name.patch [bz#1571533] +- kvm-qemu-ga-blacklisting-guest-exec-and-guest-exec-statu.patch [bz#1518132] +- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch +- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch +- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch +- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch +- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch +- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch +- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch +- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch +- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch +- Resolves: bz#1518132 + (Ensure file access RPCs are disabled by default) +- Resolves: bz#1571533 + (Convert qemu-kvm python scripts to python3) +- Resolves: bz#1590511 + (Fails to start guest with Intel vGPU device) + +* Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 +- Resolves: bz#1508137 + ([IBM 8.0 FEAT] KVM: Interactive Bootloader (qemu)) +- Resolves: bz#1513558 + (Remove RHEL6 machine types) +- Resolves: bz#1568600 + (pc-i440fx-rhel7.6.0 and pc-q35-rhel7.6.0 machine types (x86)) +- Resolves: bz#1570029 + ([IBM 8.0 FEAT] KVM: 3270 Connectivity - qemu part) +- Resolves: bz#1578855 + (Enable Native Ceph support on non x86_64 CPUs) +- Resolves: bz#1585651 + (RHEL 7.6 new pseries machine type (ppc64le)) +- Resolves: bz#1592337 + ([IBM 8.0 FEAT] KVM: CPU Model z14 ZR1 (qemu-kvm)) + +* Tue May 15 2018 Danilo C. L. de Paula - 2.12.0-11.el8.1 +- Resolves: bz#1576468 + (Enable vhost_user in qemu-kvm 2.12) + +* Wed May 09 2018 Danilo de Paula - 2.12.0-11.el8 +- Resolves: bz#1574406 + ([RHEL 8][qemu-kvm] Failed to find romfile "efi-virtio.rom") +- Resolves: bz#1569675 + (Backwards compatibility of pc-*-rhel7.5.0 and older machine-types) +- Resolves: bz#1576045 + (Fix build issue by using python3) +- Resolves: bz#1571145 + (qemu-kvm segfaults on RHEL 8 when run guestfsd under TCG) + +* Fri Apr 20 2018 Danilo de Paula - 2.12.0-10.el +- Fixing some issues with packaging. +- Rebasing to 2.12.0-rc4 + +* Fri Apr 13 2018 Danilo de Paula - 2.11.0-7.el8 +- Bumping epoch for RHEL8 and dropping self-obsoleting + +* Thu Apr 12 2018 Danilo de Paula - 2.11.0-6.el8 +- Rebuilding + +* Mon Mar 05 2018 Danilo de Paula - 2.11.0-5.el8 +- Prepare building on RHEL-8.0