- kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch [RHEL-65852] - kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch [RHEL-65852] - kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch [RHEL-67706] - kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch [RHEL-67706] - kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch [RHEL-67706] - kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch [RHEL-67706] - kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch [RHEL-67706] - kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch [RHEL-67706] - kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch [RHEL-67706] - Resolves: RHEL-65852 (Support multipath failover with scsi-block) - Resolves: RHEL-67706 (postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S')
216 lines
6.8 KiB
Diff
216 lines
6.8 KiB
Diff
From 35a2470f67cb38c52246974a853d843dbb80b84d Mon Sep 17 00:00:00 2001
|
|
From: Kevin Wolf <kwolf@redhat.com>
|
|
Date: Thu, 22 May 2025 15:08:03 +0200
|
|
Subject: [PATCH 2/9] file-posix: Probe paths and retry SG_IO on potential path
|
|
errors
|
|
|
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
RH-MergeRequest: 370: file-posix: Fix multipath failover with SCSI passthrough
|
|
RH-Jira: RHEL-65852
|
|
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
RH-Commit: [2/2] bbaa1d4de0675a2c67dafed74eacc0d1103aab18 (kmwolf/centos-qemu-kvm)
|
|
|
|
When scsi-block is used on a host multipath device, it runs into the
|
|
problem that the kernel dm-mpath doesn't know anything about SCSI or
|
|
SG_IO and therefore can't decide if a SG_IO request returned an error
|
|
and needs to be retried on a different path. Instead of getting working
|
|
failover, an error is returned to scsi-block and handled according to
|
|
the configured error policy. Obviously, this is not what users want,
|
|
they want working failover.
|
|
|
|
QEMU can parse the SG_IO result and determine whether this could have
|
|
been a path error, but just retrying the same request could just send it
|
|
to the same failing path again and result in the same error.
|
|
|
|
With a kernel that supports the DM_MPATH_PROBE_PATHS ioctl on dm-mpath
|
|
block devices (queued in the device mapper tree for Linux 6.16), we can
|
|
tell the kernel to probe all paths and tell us if any usable paths
|
|
remained. If so, we can now retry the SG_IO ioctl and expect it to be
|
|
sent to a working path.
|
|
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
Message-ID: <20250522130803.34738-1-kwolf@redhat.com>
|
|
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
(cherry picked from commit bf627788ef17721955bfcfba84209a07ae5f54ea)
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
---
|
|
block/file-posix.c | 115 ++++++++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 114 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/block/file-posix.c b/block/file-posix.c
|
|
index 52cc25db84..77a35d9ae9 100644
|
|
--- a/block/file-posix.c
|
|
+++ b/block/file-posix.c
|
|
@@ -41,6 +41,7 @@
|
|
|
|
#include "scsi/pr-manager.h"
|
|
#include "scsi/constants.h"
|
|
+#include "scsi/utils.h"
|
|
|
|
#if defined(__APPLE__) && (__MACH__)
|
|
#include <sys/ioctl.h>
|
|
@@ -72,6 +73,7 @@
|
|
#include <linux/blkzoned.h>
|
|
#endif
|
|
#include <linux/cdrom.h>
|
|
+#include <linux/dm-ioctl.h>
|
|
#include <linux/fd.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/hdreg.h>
|
|
@@ -139,6 +141,22 @@
|
|
#define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_GET_TARGET_VERSION_CMD + 1)
|
|
#endif
|
|
|
|
+/*
|
|
+ * Multiple retries are mostly meant for two separate scenarios:
|
|
+ *
|
|
+ * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another
|
|
+ * path goes down.
|
|
+ *
|
|
+ * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have
|
|
+ * to send another SG_IO to switch to another path group to probe the paths in
|
|
+ * it.
|
|
+ *
|
|
+ * Even if each path is in a separate path group (path_grouping_policy set to
|
|
+ * failover), it's rare to have more than eight path groups - and even then
|
|
+ * pretty unlikely that only bad path groups would be chosen in eight retries.
|
|
+ */
|
|
+#define SG_IO_MAX_RETRIES 8
|
|
+
|
|
typedef struct BDRVRawState {
|
|
int fd;
|
|
bool use_lock;
|
|
@@ -166,6 +184,7 @@ typedef struct BDRVRawState {
|
|
bool use_linux_aio:1;
|
|
bool has_laio_fdsync:1;
|
|
bool use_linux_io_uring:1;
|
|
+ bool use_mpath:1;
|
|
int page_cache_inconsistent; /* errno from fdatasync failure */
|
|
bool has_fallocate;
|
|
bool needs_alignment;
|
|
@@ -4262,15 +4281,105 @@ hdev_open_Mac_error:
|
|
/* Since this does ioctl the device must be already opened */
|
|
bs->sg = hdev_is_sg(bs);
|
|
|
|
+ /* sg devices aren't even block devices and can't use dm-mpath */
|
|
+ s->use_mpath = !bs->sg;
|
|
+
|
|
return ret;
|
|
}
|
|
|
|
#if defined(__linux__)
|
|
+#if defined(DM_MPATH_PROBE_PATHS)
|
|
+static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr)
|
|
+{
|
|
+ if (ret < 0) {
|
|
+ switch (ret) {
|
|
+ case -ENODEV:
|
|
+ return true;
|
|
+ case -EAGAIN:
|
|
+ /*
|
|
+ * The device is probably suspended. This happens while the dm table
|
|
+ * is reloaded, e.g. because a path is added or removed. This is an
|
|
+ * operation that should complete within 1ms, so just wait a bit and
|
|
+ * retry.
|
|
+ *
|
|
+ * If the device was suspended for another reason, we'll wait and
|
|
+ * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before
|
|
+ * we return an error and potentially stop the VM.
|
|
+ */
|
|
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
|
|
+ return true;
|
|
+ default:
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (io_hdr->host_status != SCSI_HOST_OK) {
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ switch (io_hdr->status) {
|
|
+ case GOOD:
|
|
+ case CONDITION_GOOD:
|
|
+ case INTERMEDIATE_GOOD:
|
|
+ case INTERMEDIATE_C_GOOD:
|
|
+ case RESERVATION_CONFLICT:
|
|
+ case COMMAND_TERMINATED:
|
|
+ return false;
|
|
+ case CHECK_CONDITION:
|
|
+ return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp,
|
|
+ io_hdr->mx_sb_len);
|
|
+ default:
|
|
+ return true;
|
|
+ }
|
|
+}
|
|
+
|
|
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
|
|
+{
|
|
+ BDRVRawState *s = acb->bs->opaque;
|
|
+ RawPosixAIOData probe_acb;
|
|
+
|
|
+ if (!s->use_mpath) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (!sgio_path_error(ret, acb->ioctl.buf)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ probe_acb = (RawPosixAIOData) {
|
|
+ .bs = acb->bs,
|
|
+ .aio_type = QEMU_AIO_IOCTL,
|
|
+ .aio_fildes = s->fd,
|
|
+ .aio_offset = 0,
|
|
+ .ioctl = {
|
|
+ .buf = NULL,
|
|
+ .cmd = DM_MPATH_PROBE_PATHS,
|
|
+ },
|
|
+ };
|
|
+
|
|
+ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb);
|
|
+ if (ret == -ENOTTY) {
|
|
+ s->use_mpath = false;
|
|
+ } else if (ret == -EAGAIN) {
|
|
+ /* The device might be suspended for a table reload, worth retrying */
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ return ret == 0;
|
|
+}
|
|
+#else
|
|
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
|
|
+{
|
|
+ return false;
|
|
+}
|
|
+#endif /* DM_MPATH_PROBE_PATHS */
|
|
+
|
|
static int coroutine_fn
|
|
hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
|
|
{
|
|
BDRVRawState *s = bs->opaque;
|
|
RawPosixAIOData acb;
|
|
+ int retries = SG_IO_MAX_RETRIES;
|
|
int ret;
|
|
|
|
ret = fd_open(bs);
|
|
@@ -4298,7 +4407,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
|
|
},
|
|
};
|
|
|
|
- return raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
|
|
+ do {
|
|
+ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
|
|
+ } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret));
|
|
+
|
|
+ return ret;
|
|
}
|
|
#endif /* linux */
|
|
|
|
--
|
|
2.39.3
|
|
|