130 lines
5.3 KiB
Diff
130 lines
5.3 KiB
Diff
|
From 790cebc2a1d8de8d93b2a2a0ef19e31c767f4f1c Mon Sep 17 00:00:00 2001
|
||
|
From: Max Reitz <mreitz@redhat.com>
|
||
|
Date: Mon, 9 Sep 2019 07:38:20 +0100
|
||
|
Subject: [PATCH 4/6] file-posix: Handle undetectable alignment
|
||
|
|
||
|
RH-Author: Max Reitz <mreitz@redhat.com>
|
||
|
Message-id: <20190909073822.26191-2-mreitz@redhat.com>
|
||
|
Patchwork-id: 90332
|
||
|
O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/3] file-posix: Handle undetectable alignment
|
||
|
Bugzilla: 1749134
|
||
|
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||
|
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||
|
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
|
||
|
From: Nir Soffer <nirsof@gmail.com>
|
||
|
|
||
|
In some cases buf_align or request_alignment cannot be detected:
|
||
|
|
||
|
1. With Gluster, buf_align cannot be detected since the actual I/O is
|
||
|
done on Gluster server, and qemu buffer alignment does not matter.
|
||
|
Since we don't have alignment requirement, buf_align=1 is the best
|
||
|
value.
|
||
|
|
||
|
2. With local XFS filesystem, buf_align cannot be detected if reading
|
||
|
from unallocated area. In this we must align the buffer, but we don't
|
||
|
know what is the correct size. Using the wrong alignment results in
|
||
|
I/O error.
|
||
|
|
||
|
3. With Gluster backed by XFS, request_alignment cannot be detected if
|
||
|
reading from unallocated area. In this case we need to use the
|
||
|
correct alignment, and failing to do so results in I/O errors.
|
||
|
|
||
|
4. With NFS, the server does not use direct I/O, so both buf_align cannot
|
||
|
be detected. In this case we don't need any alignment so we can use
|
||
|
buf_align=1 and request_alignment=1.
|
||
|
|
||
|
These cases seems to work when storage sector size is 512 bytes, because
|
||
|
the current code starts checking align=512. If the check succeeds
|
||
|
because alignment cannot be detected we use 512. But this does not work
|
||
|
for storage with 4k sector size.
|
||
|
|
||
|
To determine if we can detect the alignment, we probe first with
|
||
|
align=1. If probing succeeds, maybe there are no alignment requirement
|
||
|
(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we
|
||
|
don't have any way to tell, we treat this as undetectable alignment. If
|
||
|
probing with align=1 fails with EINVAL, but probing with one of the
|
||
|
expected alignments succeeds, we know that we found a working alignment.
|
||
|
|
||
|
Practically the alignment requirements are the same for buffer
|
||
|
alignment, buffer length, and offset in file. So in case we cannot
|
||
|
detect buf_align, we can use request alignment. If we cannot detect
|
||
|
request alignment, we can fallback to a safe value. To use this logic,
|
||
|
we probe first request alignment instead of buf_align.
|
||
|
|
||
|
Here is a table showing the behaviour with current code (the value in
|
||
|
parenthesis is the optimal value).
|
||
|
|
||
|
Case Sector buf_align (opt) request_alignment (opt) result
|
||
|
|
||
|
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
||
|
---
|
||
|
block/file-posix.c | 36 +++++++++++++++++++++++++-----------
|
||
|
1 file changed, 25 insertions(+), 11 deletions(-)
|
||
|
|
||
|
diff --git a/block/file-posix.c b/block/file-posix.c
|
||
|
index 4479cc7..b8b4dad 100644
|
||
|
--- a/block/file-posix.c
|
||
|
+++ b/block/file-posix.c
|
||
|
@@ -323,6 +323,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
|
||
|
BDRVRawState *s = bs->opaque;
|
||
|
char *buf;
|
||
|
size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
|
||
|
+ size_t alignments[] = {1, 512, 1024, 2048, 4096};
|
||
|
|
||
|
/* For SCSI generic devices the alignment is not really used.
|
||
|
With buffered I/O, we don't have any restrictions. */
|
||
|
@@ -349,25 +350,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
- /* If we could not get the sizes so far, we can only guess them */
|
||
|
- if (!s->buf_align) {
|
||
|
+ /*
|
||
|
+ * If we could not get the sizes so far, we can only guess them. First try
|
||
|
+ * to detect request alignment, since it is more likely to succeed. Then
|
||
|
+ * try to detect buf_align, which cannot be detected in some cases (e.g.
|
||
|
+ * Gluster). If buf_align cannot be detected, we fallback to the value of
|
||
|
+ * request_alignment.
|
||
|
+ */
|
||
|
+
|
||
|
+ if (!bs->bl.request_alignment) {
|
||
|
+ int i;
|
||
|
size_t align;
|
||
|
- buf = qemu_memalign(max_align, 2 * max_align);
|
||
|
- for (align = 512; align <= max_align; align <<= 1) {
|
||
|
- if (raw_is_io_aligned(fd, buf + align, max_align)) {
|
||
|
- s->buf_align = align;
|
||
|
+ buf = qemu_memalign(max_align, max_align);
|
||
|
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
|
||
|
+ align = alignments[i];
|
||
|
+ if (raw_is_io_aligned(fd, buf, align)) {
|
||
|
+ /* Fallback to safe value. */
|
||
|
+ bs->bl.request_alignment = (align != 1) ? align : max_align;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
qemu_vfree(buf);
|
||
|
}
|
||
|
|
||
|
- if (!bs->bl.request_alignment) {
|
||
|
+ if (!s->buf_align) {
|
||
|
+ int i;
|
||
|
size_t align;
|
||
|
- buf = qemu_memalign(s->buf_align, max_align);
|
||
|
- for (align = 512; align <= max_align; align <<= 1) {
|
||
|
- if (raw_is_io_aligned(fd, buf, align)) {
|
||
|
- bs->bl.request_alignment = align;
|
||
|
+ buf = qemu_memalign(max_align, 2 * max_align);
|
||
|
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
|
||
|
+ align = alignments[i];
|
||
|
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
|
||
|
+ /* Fallback to request_aligment. */
|
||
|
+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|