From fe09579d2f52d5677b2fb1cef3f9a1be895eb792 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 9 Jun 2025 17:42:49 -0400 Subject: [PATCH] * Mon Jun 09 2025 Jon Maloy - 9.1.0-23 - kvm-net-vhost-user-add-QAPI-events-to-report-connection-.patch [RHEL-95120] - kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch [RHEL-95408] - kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch [RHEL-95408] - Resolves: RHEL-95120 (Allow libvirt to restart passt/vhost-user when the process is killed [rhel-9.7]) - Resolves: RHEL-95408 (Support multipath failover with scsi-block [rhel-9]) --- ...le-posix-Define-DM_MPATH_PROBE_PATHS.patch | 42 ++++ ...-paths-and-retry-SG_IO-on-potential-.patch | 215 ++++++++++++++++++ ...dd-QAPI-events-to-report-connection-.patch | 133 +++++++++++ qemu-kvm.spec | 17 +- 4 files changed, 406 insertions(+), 1 deletion(-) create mode 100644 kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch create mode 100644 kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch create mode 100644 kvm-net-vhost-user-add-QAPI-events-to-report-connection-.patch diff --git a/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch b/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch new file mode 100644 index 0000000..08287d4 --- /dev/null +++ b/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch @@ -0,0 +1,42 @@ +From d565fe385b3c45a41fa8e25942220aff38a04fc3 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 29 Apr 2025 17:05:41 +0200 +Subject: [PATCH 2/3] file-posix: Define DM_MPATH_PROBE_PATHS + +RH-Author: Kevin Wolf +RH-MergeRequest: 372: file-posix: Fix multipath failover with SCSI passthrough [9.7] +RH-Jira: RHEL-95408 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 7615906833a6bb2b4645fa5cd60d78aa9631cb7c (kmwolf/centos-qemu-kvm) + +While the kernel side isn't merged yet and we're still using old kernel +headers, just define DM_MPATH_PROBE_PATHS manually. + +This is a downstream-only patch that can be removed after the next minor +release. + +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 0cb4e922c0..6a5c506549 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -134,6 +134,11 @@ + #define RAW_LOCK_PERM_BASE 100 + #define RAW_LOCK_SHARED_BASE 200 + ++/* TODO Remove this when the kernel side is merged */ ++#if !defined(DM_MPATH_PROBE_PATHS) && defined(DM_GET_TARGET_VERSION) ++#define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_GET_TARGET_VERSION_CMD + 1) ++#endif ++ + typedef struct BDRVRawState { + int fd; + bool use_lock; +-- +2.48.1 + diff --git a/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch b/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch new file mode 100644 index 0000000..bd716a1 --- /dev/null +++ b/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch @@ -0,0 +1,215 @@ +From 95c651ba1177bd88dbd9b52fe2ec8fedadcdb5c8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 22 May 2025 15:08:03 +0200 +Subject: [PATCH 3/3] file-posix: Probe paths and retry SG_IO on potential path + errors + +RH-Author: Kevin Wolf +RH-MergeRequest: 372: file-posix: Fix multipath failover with SCSI passthrough [9.7] +RH-Jira: RHEL-95408 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] 4312e9ec609e511afdfb6634e1d2370032d41543 (kmwolf/centos-qemu-kvm) + +When scsi-block is used on a host multipath device, it runs into the +problem that the kernel dm-mpath doesn't know anything about SCSI or +SG_IO and therefore can't decide if a SG_IO request returned an error +and needs to be retried on a different path. Instead of getting working +failover, an error is returned to scsi-block and handled according to +the configured error policy. Obviously, this is not what users want, +they want working failover. + +QEMU can parse the SG_IO result and determine whether this could have +been a path error, but just retrying the same request could just send it +to the same failing path again and result in the same error. + +With a kernel that supports the DM_MPATH_PROBE_PATHS ioctl on dm-mpath +block devices (queued in the device mapper tree for Linux 6.16), we can +tell the kernel to probe all paths and tell us if any usable paths +remained. If so, we can now retry the SG_IO ioctl and expect it to be +sent to a working path. + +Signed-off-by: Kevin Wolf +Message-ID: <20250522130803.34738-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit bf627788ef17721955bfcfba84209a07ae5f54ea) +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 115 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 114 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 6a5c506549..f17a3f4d10 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -41,6 +41,7 @@ + + #include "scsi/pr-manager.h" + #include "scsi/constants.h" ++#include "scsi/utils.h" + + #if defined(__APPLE__) && (__MACH__) + #include +@@ -72,6 +73,7 @@ + #include + #endif + #include ++#include + #include + #include + #include +@@ -139,6 +141,22 @@ + #define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_GET_TARGET_VERSION_CMD + 1) + #endif + ++/* ++ * Multiple retries are mostly meant for two separate scenarios: ++ * ++ * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another ++ * path goes down. ++ * ++ * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have ++ * to send another SG_IO to switch to another path group to probe the paths in ++ * it. ++ * ++ * Even if each path is in a separate path group (path_grouping_policy set to ++ * failover), it's rare to have more than eight path groups - and even then ++ * pretty unlikely that only bad path groups would be chosen in eight retries. ++ */ ++#define SG_IO_MAX_RETRIES 8 ++ + typedef struct BDRVRawState { + int fd; + bool use_lock; +@@ -166,6 +184,7 @@ typedef struct BDRVRawState { + bool use_linux_aio:1; + bool has_laio_fdsync:1; + bool use_linux_io_uring:1; ++ bool use_mpath:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ + bool has_fallocate; + bool needs_alignment; +@@ -4248,15 +4267,105 @@ hdev_open_Mac_error: + /* Since this does ioctl the device must be already opened */ + bs->sg = hdev_is_sg(bs); + ++ /* sg devices aren't even block devices and can't use dm-mpath */ ++ s->use_mpath = !bs->sg; ++ + return ret; + } + + #if defined(__linux__) ++#if defined(DM_MPATH_PROBE_PATHS) ++static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr) ++{ ++ if (ret < 0) { ++ switch (ret) { ++ case -ENODEV: ++ return true; ++ case -EAGAIN: ++ /* ++ * The device is probably suspended. This happens while the dm table ++ * is reloaded, e.g. because a path is added or removed. This is an ++ * operation that should complete within 1ms, so just wait a bit and ++ * retry. ++ * ++ * If the device was suspended for another reason, we'll wait and ++ * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before ++ * we return an error and potentially stop the VM. ++ */ ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ if (io_hdr->host_status != SCSI_HOST_OK) { ++ return true; ++ } ++ ++ switch (io_hdr->status) { ++ case GOOD: ++ case CONDITION_GOOD: ++ case INTERMEDIATE_GOOD: ++ case INTERMEDIATE_C_GOOD: ++ case RESERVATION_CONFLICT: ++ case COMMAND_TERMINATED: ++ return false; ++ case CHECK_CONDITION: ++ return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp, ++ io_hdr->mx_sb_len); ++ default: ++ return true; ++ } ++} ++ ++static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) ++{ ++ BDRVRawState *s = acb->bs->opaque; ++ RawPosixAIOData probe_acb; ++ ++ if (!s->use_mpath) { ++ return false; ++ } ++ ++ if (!sgio_path_error(ret, acb->ioctl.buf)) { ++ return false; ++ } ++ ++ probe_acb = (RawPosixAIOData) { ++ .bs = acb->bs, ++ .aio_type = QEMU_AIO_IOCTL, ++ .aio_fildes = s->fd, ++ .aio_offset = 0, ++ .ioctl = { ++ .buf = NULL, ++ .cmd = DM_MPATH_PROBE_PATHS, ++ }, ++ }; ++ ++ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb); ++ if (ret == -ENOTTY) { ++ s->use_mpath = false; ++ } else if (ret == -EAGAIN) { ++ /* The device might be suspended for a table reload, worth retrying */ ++ return true; ++ } ++ ++ return ret == 0; ++} ++#else ++static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) ++{ ++ return false; ++} ++#endif /* DM_MPATH_PROBE_PATHS */ ++ + static int coroutine_fn + hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) + { + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; ++ int retries = SG_IO_MAX_RETRIES; + int ret; + + ret = fd_open(bs); +@@ -4284,7 +4393,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) + }, + }; + +- return raw_thread_pool_submit(handle_aiocb_ioctl, &acb); ++ do { ++ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb); ++ } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret)); ++ ++ return ret; + } + #endif /* linux */ + +-- +2.48.1 + diff --git a/kvm-net-vhost-user-add-QAPI-events-to-report-connection-.patch b/kvm-net-vhost-user-add-QAPI-events-to-report-connection-.patch new file mode 100644 index 0000000..a792e7c --- /dev/null +++ b/kvm-net-vhost-user-add-QAPI-events-to-report-connection-.patch @@ -0,0 +1,133 @@ +From b6de1e19ba778547e92997c6cad77d7cf755c78b Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 17 Feb 2025 10:25:50 +0100 +Subject: [PATCH 1/3] net: vhost-user: add QAPI events to report connection + state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 371: net: vhost-user: add QAPI events to report connection state +RH-Jira: RHEL-95120 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Commit: [1/1] c8f65026e3548891fe713a1622438388e285dbf3 (lvivier/qemu-kvm-centos) + +The netdev reports NETDEV_VHOST_USER_CONNECTED event when +the chardev is connected, and NETDEV_VHOST_USER_DISCONNECTED +when it is disconnected. + +The NETDEV_VHOST_USER_CONNECTED event includes the chardev id. + +This allows a system manager like libvirt to detect when the server +fails. + +For instance with passt: + +{ 'execute': 'qmp_capabilities' } +{ "return": { } } + +[killing passt here] + +{ "timestamp": { "seconds": 1739538634, "microseconds": 920450 }, + "event": "NETDEV_VHOST_USER_DISCONNECTED", + "data": { "netdev-id": "netdev0" } } + +[automatic reconnection with reconnect-ms] + +{ "timestamp": { "seconds": 1739538638, "microseconds": 354181 }, + "event": "NETDEV_VHOST_USER_CONNECTED", + "data": { "netdev-id": "netdev0", "chardev-id": "chr0" } } + +Tested-by: Stefano Brivio +Signed-off-by: Laurent Vivier +Message-Id: <20250217092550.1172055-1-lvivier@redhat.com> +Acked-by: Markus Armbruster +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 02fd9f8aeeb184276b283ae2f404bc3acf1e7b7a) +--- + net/vhost-user.c | 3 +++ + qapi/net.json | 40 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 43 insertions(+) + +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 12555518e8..0b235e50c6 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -16,6 +16,7 @@ + #include "chardev/char-fe.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-net.h" ++#include "qapi/qapi-events-net.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" + #include "qemu/option.h" +@@ -271,6 +272,7 @@ static void chr_closed_bh(void *opaque) + if (err) { + error_report_err(err); + } ++ qapi_event_send_netdev_vhost_user_disconnected(name); + } + + static void net_vhost_user_event(void *opaque, QEMUChrEvent event) +@@ -300,6 +302,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) + net_vhost_user_watch, s); + qmp_set_link(name, true, &err); + s->started = true; ++ qapi_event_send_netdev_vhost_user_connected(name, chr->label); + break; + case CHR_EVENT_CLOSED: + /* a close event may happen during a read/write, but vhost +diff --git a/qapi/net.json b/qapi/net.json +index 87fc0d0b28..7bd1eaa1ba 100644 +--- a/qapi/net.json ++++ b/qapi/net.json +@@ -1020,3 +1020,43 @@ + ## + { 'event': 'NETDEV_STREAM_DISCONNECTED', + 'data': { 'netdev-id': 'str' } } ++ ++## ++# @NETDEV_VHOST_USER_CONNECTED: ++# ++# Emitted when the vhost-user chardev is connected ++# ++# @netdev-id: QEMU netdev id that is connected ++# ++# @chardev-id: The character device id used by the QEMU netdev ++# ++# Since: 10.0 ++# ++# .. qmp-example:: ++# ++# <- { "timestamp": {"seconds": 1739538638, "microseconds": 354181 }, ++# "event": "NETDEV_VHOST_USER_CONNECTED", ++# "data": { "netdev-id": "netdev0", "chardev-id": "chr0" } } ++# ++## ++{ 'event': 'NETDEV_VHOST_USER_CONNECTED', ++ 'data': { 'netdev-id': 'str', 'chardev-id': 'str' } } ++ ++## ++# @NETDEV_VHOST_USER_DISCONNECTED: ++# ++# Emitted when the vhost-user chardev is disconnected ++# ++# @netdev-id: QEMU netdev id that is disconnected ++# ++# Since: 10.0 ++# ++# .. qmp-example:: ++# ++# <- { "timestamp": { "seconds": 1739538634, "microseconds": 920450 }, ++# "event": "NETDEV_VHOST_USER_DISCONNECTED", ++# "data": { "netdev-id": "netdev0" } } ++# ++## ++{ 'event': 'NETDEV_VHOST_USER_DISCONNECTED', ++ 'data': { 'netdev-id': 'str' } } +-- +2.48.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index da45340..c03728c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 9.1.0 -Release: 22%{?rcrel}%{?dist}%{?cc_suffix} +Release: 23%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -569,6 +569,12 @@ Patch198: kvm-vfio-helpers-Refactor-vfio_region_mmap-error-handlin.patch Patch199: kvm-vfio-helpers-Align-mmaps.patch # For RHEL-85159 - Video stuck about 1 min after switchover phase when play one video during postcopy-preempt migration Patch200: kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch +# For RHEL-95120 - Allow libvirt to restart passt/vhost-user when the process is killed [rhel-9.7] +Patch201: kvm-net-vhost-user-add-QAPI-events-to-report-connection-.patch +# For RHEL-95408 - Support multipath failover with scsi-block [rhel-9] +Patch202: kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch +# For RHEL-95408 - Support multipath failover with scsi-block [rhel-9] +Patch203: kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch %if %{have_clang} BuildRequires: clang @@ -1644,6 +1650,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jun 09 2025 Jon Maloy - 9.1.0-23 +- kvm-net-vhost-user-add-QAPI-events-to-report-connection-.patch [RHEL-95120] +- kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch [RHEL-95408] +- kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch [RHEL-95408] +- Resolves: RHEL-95120 + (Allow libvirt to restart passt/vhost-user when the process is killed [rhel-9.7]) +- Resolves: RHEL-95408 + (Support multipath failover with scsi-block [rhel-9]) + * Mon Jun 02 2025 Jon Maloy - 9.1.0-22 - kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch [RHEL-85159] - kvm-Allow-guest-network-get-route-guest-get-load-QGA-com.patch [RHEL-91605 RHEL-91606]