268 lines
7.5 KiB
Diff
268 lines
7.5 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Benjamin Marzinski <bmarzins@redhat.com>
|
||
|
Date: Fri, 26 Jun 2020 20:06:24 -0500
|
||
|
Subject: [PATCH] kpartx: read devices with direct IO
|
||
|
|
||
|
If kpartx is used on top of shared storage, and a device has its
|
||
|
partition table changed on one machine, and then kpartx is run on
|
||
|
another, it may not see the new data, because the cache still contains
|
||
|
the old data, and there is nothing to tell the machine running kpartx to
|
||
|
invalidate it. To solve this, kpartx should read the devices using
|
||
|
direct io.
|
||
|
|
||
|
One issue with how this code has been updated is that the original code
|
||
|
for getblock() always read 1024 bytes. The new code reads a logical
|
||
|
sector size chunk of the device, and returns a pointer to the 512 byte
|
||
|
sector that the caller asked for, within that (possibly larger) chunk.
|
||
|
This means that if the logical sector size is 512, then the code is now
|
||
|
only reading 512 bytes. Looking through the code for the various
|
||
|
partition types, I can't see a case where more than 512 bytes is needed
|
||
|
and getblock() is used. If anyone has a reason why this code should be
|
||
|
reading 1024 bytes at minmum, I can certainly change this. But when I
|
||
|
looked, I couldn't find a case where reading 512 bytes would cause a
|
||
|
problem.
|
||
|
|
||
|
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
||
|
---
|
||
|
kpartx/dasd.c | 7 ++++---
|
||
|
kpartx/gpt.c | 22 +++++++++----------
|
||
|
kpartx/kpartx.c | 56 +++++++++++++++++++++++++++++++++++++++----------
|
||
|
kpartx/kpartx.h | 2 ++
|
||
|
4 files changed, 61 insertions(+), 26 deletions(-)
|
||
|
|
||
|
diff --git a/kpartx/dasd.c b/kpartx/dasd.c
|
||
|
index 14b9d3aa..f0398645 100644
|
||
|
--- a/kpartx/dasd.c
|
||
|
+++ b/kpartx/dasd.c
|
||
|
@@ -22,6 +22,7 @@
|
||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
+#define _GNU_SOURCE
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <unistd.h>
|
||
|
@@ -117,13 +118,13 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
|
||
|
|
||
|
sprintf(pathname, "/dev/.kpartx-node-%u-%u",
|
||
|
(unsigned int)major(dev), (unsigned int)minor(dev));
|
||
|
- if ((fd_dasd = open(pathname, O_RDONLY)) == -1) {
|
||
|
+ if ((fd_dasd = open(pathname, O_RDONLY | O_DIRECT)) == -1) {
|
||
|
/* Devicenode does not exist. Try to create one */
|
||
|
if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) {
|
||
|
/* Couldn't create a device node */
|
||
|
return -1;
|
||
|
}
|
||
|
- fd_dasd = open(pathname, O_RDONLY);
|
||
|
+ fd_dasd = open(pathname, O_RDONLY | O_DIRECT);
|
||
|
/*
|
||
|
* The file will vanish when the last process (we)
|
||
|
* has ceased to access it.
|
||
|
@@ -175,7 +176,7 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
|
||
|
* Get volume label, extract name and type.
|
||
|
*/
|
||
|
|
||
|
- if (!(data = (unsigned char *)malloc(blocksize)))
|
||
|
+ if (aligned_malloc((void **)&data, blocksize, NULL))
|
||
|
goto out;
|
||
|
|
||
|
|
||
|
diff --git a/kpartx/gpt.c b/kpartx/gpt.c
|
||
|
index 785b34ea..f7fefb70 100644
|
||
|
--- a/kpartx/gpt.c
|
||
|
+++ b/kpartx/gpt.c
|
||
|
@@ -243,8 +243,7 @@ alloc_read_gpt_entries(int fd, gpt_header * gpt)
|
||
|
|
||
|
if (!count) return NULL;
|
||
|
|
||
|
- pte = (gpt_entry *)malloc(count);
|
||
|
- if (!pte)
|
||
|
+ if (aligned_malloc((void **)&pte, get_sector_size(fd), &count))
|
||
|
return NULL;
|
||
|
memset(pte, 0, count);
|
||
|
|
||
|
@@ -269,12 +268,11 @@ static gpt_header *
|
||
|
alloc_read_gpt_header(int fd, uint64_t lba)
|
||
|
{
|
||
|
gpt_header *gpt;
|
||
|
- gpt = (gpt_header *)
|
||
|
- malloc(sizeof (gpt_header));
|
||
|
- if (!gpt)
|
||
|
+ size_t size = sizeof (gpt_header);
|
||
|
+ if (aligned_malloc((void **)&gpt, get_sector_size(fd), &size))
|
||
|
return NULL;
|
||
|
- memset(gpt, 0, sizeof (*gpt));
|
||
|
- if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) {
|
||
|
+ memset(gpt, 0, size);
|
||
|
+ if (!read_lba(fd, lba, gpt, size)) {
|
||
|
free(gpt);
|
||
|
return NULL;
|
||
|
}
|
||
|
@@ -498,6 +496,7 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
|
||
|
gpt_header *pgpt = NULL, *agpt = NULL;
|
||
|
gpt_entry *pptes = NULL, *aptes = NULL;
|
||
|
legacy_mbr *legacymbr = NULL;
|
||
|
+ size_t size = sizeof(legacy_mbr);
|
||
|
uint64_t lastlba;
|
||
|
if (!gpt || !ptes)
|
||
|
return 0;
|
||
|
@@ -526,11 +525,10 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
|
||
|
}
|
||
|
|
||
|
/* This will be added to the EFI Spec. per Intel after v1.02. */
|
||
|
- legacymbr = malloc(sizeof (*legacymbr));
|
||
|
- if (legacymbr) {
|
||
|
- memset(legacymbr, 0, sizeof (*legacymbr));
|
||
|
- read_lba(fd, 0, (uint8_t *) legacymbr,
|
||
|
- sizeof (*legacymbr));
|
||
|
+ if (aligned_malloc((void **)&legacymbr, get_sector_size(fd),
|
||
|
+ &size) == 0) {
|
||
|
+ memset(legacymbr, 0, size);
|
||
|
+ read_lba(fd, 0, (uint8_t *) legacymbr, size);
|
||
|
good_pmbr = is_pmbr_valid(legacymbr);
|
||
|
free(legacymbr);
|
||
|
legacymbr=NULL;
|
||
|
diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c
|
||
|
index d3620c5c..c24ad6d9 100644
|
||
|
--- a/kpartx/kpartx.c
|
||
|
+++ b/kpartx/kpartx.c
|
||
|
@@ -19,6 +19,7 @@
|
||
|
* cva, 2002-10-26
|
||
|
*/
|
||
|
|
||
|
+#define _GNU_SOURCE
|
||
|
#include <stdio.h>
|
||
|
#include <fcntl.h>
|
||
|
#include <errno.h>
|
||
|
@@ -41,7 +42,6 @@
|
||
|
|
||
|
#define SIZE(a) (sizeof(a)/sizeof((a)[0]))
|
||
|
|
||
|
-#define READ_SIZE 1024
|
||
|
#define MAXTYPES 64
|
||
|
#define MAXSLICES 256
|
||
|
#define DM_TARGET "linear"
|
||
|
@@ -388,7 +388,7 @@ main(int argc, char **argv){
|
||
|
set_delimiter(mapname, delim);
|
||
|
}
|
||
|
|
||
|
- fd = open(device, O_RDONLY);
|
||
|
+ fd = open(device, O_RDONLY | O_DIRECT);
|
||
|
|
||
|
if (fd == -1) {
|
||
|
perror(device);
|
||
|
@@ -690,9 +690,9 @@ xmalloc (size_t size) {
|
||
|
*/
|
||
|
|
||
|
static int
|
||
|
-sseek(int fd, unsigned int secnr) {
|
||
|
+sseek(int fd, unsigned int secnr, int secsz) {
|
||
|
off64_t in, out;
|
||
|
- in = ((off64_t) secnr << 9);
|
||
|
+ in = ((off64_t) secnr * secsz);
|
||
|
out = 1;
|
||
|
|
||
|
if ((out = lseek64(fd, in, SEEK_SET)) != in)
|
||
|
@@ -703,6 +703,31 @@ sseek(int fd, unsigned int secnr) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+int
|
||
|
+aligned_malloc(void **mem_p, size_t align, size_t *size_p)
|
||
|
+{
|
||
|
+ static size_t pgsize = 0;
|
||
|
+ size_t size;
|
||
|
+ int err;
|
||
|
+
|
||
|
+ if (!mem_p || !align || (size_p && !*size_p))
|
||
|
+ return EINVAL;
|
||
|
+
|
||
|
+ if (!pgsize)
|
||
|
+ pgsize = getpagesize();
|
||
|
+
|
||
|
+ if (size_p)
|
||
|
+ size = ((*size_p + align - 1) / align) * align;
|
||
|
+ else
|
||
|
+ size = pgsize;
|
||
|
+
|
||
|
+ err = posix_memalign(mem_p, pgsize, size);
|
||
|
+ if (!err && size_p)
|
||
|
+ *size_p = size;
|
||
|
+ return err;
|
||
|
+}
|
||
|
+
|
||
|
+/* always in sector size blocks */
|
||
|
static
|
||
|
struct block {
|
||
|
unsigned int secnr;
|
||
|
@@ -710,30 +735,39 @@ struct block {
|
||
|
struct block *next;
|
||
|
} *blockhead;
|
||
|
|
||
|
+/* blknr is always in 512 byte blocks */
|
||
|
char *
|
||
|
-getblock (int fd, unsigned int secnr) {
|
||
|
+getblock (int fd, unsigned int blknr) {
|
||
|
+ unsigned int secsz = get_sector_size(fd);
|
||
|
+ unsigned int blks_per_sec = secsz / 512;
|
||
|
+ unsigned int secnr = blknr / blks_per_sec;
|
||
|
+ unsigned int blk_off = (blknr % blks_per_sec) * 512;
|
||
|
struct block *bp;
|
||
|
|
||
|
for (bp = blockhead; bp; bp = bp->next)
|
||
|
|
||
|
if (bp->secnr == secnr)
|
||
|
- return bp->block;
|
||
|
+ return bp->block + blk_off;
|
||
|
|
||
|
- if (sseek(fd, secnr))
|
||
|
+ if (sseek(fd, secnr, secsz))
|
||
|
return NULL;
|
||
|
|
||
|
bp = xmalloc(sizeof(struct block));
|
||
|
bp->secnr = secnr;
|
||
|
bp->next = blockhead;
|
||
|
blockhead = bp;
|
||
|
- bp->block = (char *) xmalloc(READ_SIZE);
|
||
|
+ if (aligned_malloc((void **)&bp->block, secsz, NULL)) {
|
||
|
+ fprintf(stderr, "aligned_malloc failed\n");
|
||
|
+ exit(1);
|
||
|
+ }
|
||
|
|
||
|
- if (read(fd, bp->block, READ_SIZE) != READ_SIZE) {
|
||
|
+ if (read(fd, bp->block, secsz) != secsz) {
|
||
|
fprintf(stderr, "read error, sector %d\n", secnr);
|
||
|
- bp->block = NULL;
|
||
|
+ blockhead = bp->next;
|
||
|
+ return NULL;
|
||
|
}
|
||
|
|
||
|
- return bp->block;
|
||
|
+ return bp->block + blk_off;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h
|
||
|
index 67edeb82..727632c1 100644
|
||
|
--- a/kpartx/kpartx.h
|
||
|
+++ b/kpartx/kpartx.h
|
||
|
@@ -1,6 +1,7 @@
|
||
|
#ifndef _KPARTX_H
|
||
|
#define _KPARTX_H
|
||
|
|
||
|
+#include <stddef.h>
|
||
|
#include <stdint.h>
|
||
|
#include <sys/ioctl.h>
|
||
|
|
||
|
@@ -61,6 +62,7 @@ extern ptreader read_mac_pt;
|
||
|
extern ptreader read_sun_pt;
|
||
|
extern ptreader read_ps3_pt;
|
||
|
|
||
|
+int aligned_malloc(void **mem_p, size_t align, size_t *size_p);
|
||
|
char *getblock(int fd, unsigned int secnr);
|
||
|
|
||
|
static inline unsigned int
|
||
|
--
|
||
|
2.17.2
|
||
|
|