Additional patches for 9.7.0 lvm2
Resolves: RHEL-94577 RHEL-67039
This commit is contained in:
parent
dc84338e5c
commit
d36e410a4d
143
0008-lvmlockd-fix-hosts-check-for-vgremove.patch
Normal file
143
0008-lvmlockd-fix-hosts-check-for-vgremove.patch
Normal file
@ -0,0 +1,143 @@
|
||||
From 54d5564f0fc375278c940635c449bdeb24334b71 Mon Sep 17 00:00:00 2001
|
||||
From: David Teigland <teigland@redhat.com>
|
||||
Date: Wed, 7 May 2025 17:51:01 -0500
|
||||
Subject: [PATCH 08/14] lvmlockd: fix hosts check for vgremove
|
||||
|
||||
errors from lock manager were not being considered.
|
||||
EAGAIN from sanlock should be considered EBUSY.
|
||||
|
||||
(cherry picked from commit 53752ef851d3210e52297ebb4744fdd766c060c6)
|
||||
---
|
||||
daemons/lvmlockd/lvmlockd-core.c | 11 ++++++++---
|
||||
daemons/lvmlockd/lvmlockd-dlm.c | 8 ++++++++
|
||||
daemons/lvmlockd/lvmlockd-sanlock.c | 26 ++++++++++++++++++++++----
|
||||
3 files changed, 38 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c
|
||||
index c65a3cc39..e4aa91216 100644
|
||||
--- a/daemons/lvmlockd/lvmlockd-core.c
|
||||
+++ b/daemons/lvmlockd/lvmlockd-core.c
|
||||
@@ -2708,13 +2708,16 @@ static void *lockspace_thread_main(void *arg_in)
|
||||
rv = lm_hosts(ls, 1);
|
||||
if (rv) {
|
||||
/*
|
||||
+ * rv < 0: error (don't remove)
|
||||
+ * rv > 0: other hosts in lockspace (cannot remove)
|
||||
+ * rv = 0: only local host in lockspace (can remove)
|
||||
* Checking for hosts here in addition to after the
|
||||
* main loop allows vgremove to fail and be rerun
|
||||
* after the ls is stopped on other hosts.
|
||||
*/
|
||||
log_error("S %s lockspace hosts %d", ls->name, rv);
|
||||
list_del(&act->list);
|
||||
- act->result = -EBUSY;
|
||||
+ act->result = (rv < 0) ? rv : -EBUSY;
|
||||
add_client_result(act);
|
||||
continue;
|
||||
}
|
||||
@@ -2727,7 +2730,9 @@ static void *lockspace_thread_main(void *arg_in)
|
||||
if (act->op == LD_OP_BUSY && act->rt == LD_RT_VG) {
|
||||
log_debug("S %s checking if lockspace is busy", ls->name);
|
||||
rv = lm_hosts(ls, 0);
|
||||
- if (rv)
|
||||
+ if (rv < 0)
|
||||
+ act->result = rv;
|
||||
+ else if (rv)
|
||||
act->result = -EBUSY;
|
||||
else
|
||||
act->result = 0;
|
||||
@@ -2743,7 +2748,7 @@ static void *lockspace_thread_main(void *arg_in)
|
||||
if (rv) {
|
||||
log_error("S %s lockspace hosts %d", ls->name, rv);
|
||||
list_del(&act->list);
|
||||
- act->result = -EBUSY;
|
||||
+ act->result = (rv < 0) ? rv : -EBUSY;
|
||||
add_client_result(act);
|
||||
continue;
|
||||
}
|
||||
diff --git a/daemons/lvmlockd/lvmlockd-dlm.c b/daemons/lvmlockd/lvmlockd-dlm.c
|
||||
index 72b139170..7529ad327 100644
|
||||
--- a/daemons/lvmlockd/lvmlockd-dlm.c
|
||||
+++ b/daemons/lvmlockd/lvmlockd-dlm.c
|
||||
@@ -799,6 +799,14 @@ int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
|
||||
* the stale lockspaces on the others eventually.)
|
||||
*/
|
||||
|
||||
+/*
|
||||
+ * On error, returns < 0
|
||||
+ *
|
||||
+ * On success:
|
||||
+ * If other hosts are found, returns the number.
|
||||
+ * If no other hosts are found (only ourself), returns 0.
|
||||
+ */
|
||||
+
|
||||
int lm_hosts_dlm(struct lockspace *ls, int notify)
|
||||
{
|
||||
char ls_nodes_path[PATH_MAX];
|
||||
diff --git a/daemons/lvmlockd/lvmlockd-sanlock.c b/daemons/lvmlockd/lvmlockd-sanlock.c
|
||||
index 1a3982071..d50d0ce4b 100644
|
||||
--- a/daemons/lvmlockd/lvmlockd-sanlock.c
|
||||
+++ b/daemons/lvmlockd/lvmlockd-sanlock.c
|
||||
@@ -2296,6 +2296,13 @@ int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * On error, returns < 0
|
||||
+ * Else:
|
||||
+ * If other hosts are found, returns the number.
|
||||
+ * If no other hosts are found (only ourself), returns 0.
|
||||
+ */
|
||||
+
|
||||
int lm_hosts_sanlock(struct lockspace *ls, int notify)
|
||||
{
|
||||
struct sanlk_host *hss = NULL;
|
||||
@@ -2310,14 +2317,25 @@ int lm_hosts_sanlock(struct lockspace *ls, int notify)
|
||||
return 0;
|
||||
|
||||
rv = sanlock_get_hosts(ls->name, 0, &hss, &hss_count, 0);
|
||||
+
|
||||
+ if (rv == -EAGAIN) {
|
||||
+ /*
|
||||
+ * No host info is available yet (perhaps lockspace was
|
||||
+ * just started so other host state is unknown.) Pretend
|
||||
+ * there is one other host (busy).
|
||||
+ */
|
||||
+ log_debug("S %s hosts_san no info, retry later", ls->name);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
if (rv < 0) {
|
||||
log_error("S %s hosts_san get_hosts error %d", ls->name, rv);
|
||||
- return 0;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
if (!hss || !hss_count) {
|
||||
log_error("S %s hosts_san zero hosts", ls->name);
|
||||
- return 0;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
hs = hss;
|
||||
@@ -2336,7 +2354,7 @@ int lm_hosts_sanlock(struct lockspace *ls, int notify)
|
||||
}
|
||||
|
||||
state = hs->flags & SANLK_HOST_MASK;
|
||||
- if (state == SANLK_HOST_LIVE)
|
||||
+ if ((state == SANLK_HOST_LIVE) || (state == SANLK_HOST_UNKNOWN))
|
||||
found_others++;
|
||||
hs++;
|
||||
}
|
||||
@@ -2358,7 +2376,7 @@ int lm_hosts_sanlock(struct lockspace *ls, int notify)
|
||||
|
||||
if (!found_self) {
|
||||
log_error("S %s hosts_san self not found others %d", ls->name, found_others);
|
||||
- return 0;
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
return found_others;
|
||||
--
|
||||
2.49.0
|
||||
|
38
0009-lvmlockd-fix-sanlock_release-for-vgremove.patch
Normal file
38
0009-lvmlockd-fix-sanlock_release-for-vgremove.patch
Normal file
@ -0,0 +1,38 @@
|
||||
From 0b375ab5f49c4eea0c705ef8e27e94a366b0f6d6 Mon Sep 17 00:00:00 2001
|
||||
From: David Teigland <teigland@redhat.com>
|
||||
Date: Thu, 8 May 2025 10:54:50 -0500
|
||||
Subject: [PATCH 09/14] lvmlockd: fix sanlock_release for vgremove
|
||||
|
||||
incorrect data was being copied to lease structs passed
|
||||
to sanlock_release(), making the lease removal fail.
|
||||
|
||||
(cherry picked from commit 88910c200f9e24a454dc9fcbf39c2df1d4887f3b)
|
||||
---
|
||||
daemons/lvmlockd/lvmlockd-sanlock.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/daemons/lvmlockd/lvmlockd-sanlock.c b/daemons/lvmlockd/lvmlockd-sanlock.c
|
||||
index d50d0ce4b..b17cbad7e 100644
|
||||
--- a/daemons/lvmlockd/lvmlockd-sanlock.c
|
||||
+++ b/daemons/lvmlockd/lvmlockd-sanlock.c
|
||||
@@ -2192,12 +2192,15 @@ static int release_rename(struct lockspace *ls, struct resource *r)
|
||||
if (!res_args)
|
||||
return -ENOMEM;
|
||||
|
||||
- memcpy(&rd1, rds, sizeof(struct rd_sanlock));
|
||||
- memcpy(&rd2, rds, sizeof(struct rd_sanlock));
|
||||
+ memcpy(&rd1, &rds->rs, sizeof(struct rd_sanlock));
|
||||
+ memcpy(&rd2, &rds->rs, sizeof(struct rd_sanlock));
|
||||
|
||||
res1 = (struct sanlk_resource *)&rd1;
|
||||
res2 = (struct sanlk_resource *)&rd2;
|
||||
|
||||
+ if (memcmp(res1->name, r->name, SANLK_NAME_LEN))
|
||||
+ log_error("%s:%s unlock_san release rename bad name %.48s", ls->name, r->name, res1->name);
|
||||
+
|
||||
strcpy_name_len(res2->name, "invalid_removed", SANLK_NAME_LEN);
|
||||
|
||||
res_args[0] = res1;
|
||||
--
|
||||
2.49.0
|
||||
|
460
0010-raid-count-or-clear-transiently-failed-devices.patch
Normal file
460
0010-raid-count-or-clear-transiently-failed-devices.patch
Normal file
@ -0,0 +1,460 @@
|
||||
From ad632cf9f19f3b8efbcf087cbf60527160ba5fec Mon Sep 17 00:00:00 2001
|
||||
From: Heinz Mauelshagen <heinzm@redhat.com>
|
||||
Date: Tue, 5 Nov 2024 18:33:19 +0100
|
||||
Subject: [PATCH 10/14] raid: count or clear transiently failed devices
|
||||
|
||||
Count or clear transiently failed devices as of dm-raid superblocks.
|
||||
Updated debuging.
|
||||
Use lvconvert --repair to repair transiently failed legs.
|
||||
Activating all 'meta' LVs with single sync_local_dev_names().
|
||||
Using proper DM path for meta LV.
|
||||
|
||||
Modified-by: zkabelac@redhat.com
|
||||
(cherry picked from commit 03d8661657bb3d1cb5dd764f3a450a8211f892e6)
|
||||
---
|
||||
device_mapper/Makefile | 1 +
|
||||
device_mapper/all.h | 1 +
|
||||
device_mapper/raid/raid_parser.c | 164 ++++++++++++++++++++++++++++
|
||||
device_mapper/raid/target.h | 23 ++++
|
||||
lib/activate/activate.h | 2 +
|
||||
lib/metadata/raid_manip.c | 179 ++++++++++++++++++++++++++++++-
|
||||
6 files changed, 369 insertions(+), 1 deletion(-)
|
||||
create mode 100644 device_mapper/raid/raid_parser.c
|
||||
create mode 100644 device_mapper/raid/target.h
|
||||
|
||||
diff --git a/device_mapper/Makefile b/device_mapper/Makefile
|
||||
index b1aa53c36..4dfcd4f12 100644
|
||||
--- a/device_mapper/Makefile
|
||||
+++ b/device_mapper/Makefile
|
||||
@@ -25,6 +25,7 @@ DEVICE_MAPPER_SOURCE=\
|
||||
device_mapper/libdm-targets.c \
|
||||
device_mapper/libdm-timestamp.c \
|
||||
device_mapper/mm/pool.c \
|
||||
+ device_mapper/raid/raid_parser.c \
|
||||
device_mapper/regex/matcher.c \
|
||||
device_mapper/regex/parse_rx.c \
|
||||
device_mapper/regex/ttree.c \
|
||||
diff --git a/device_mapper/all.h b/device_mapper/all.h
|
||||
index 91c085e76..97279c10b 100644
|
||||
--- a/device_mapper/all.h
|
||||
+++ b/device_mapper/all.h
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
#include "base/data-struct/list.h"
|
||||
#include "base/data-struct/hash.h"
|
||||
+#include "raid/target.h"
|
||||
#include "vdo/target.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
diff --git a/device_mapper/raid/raid_parser.c b/device_mapper/raid/raid_parser.c
|
||||
new file mode 100644
|
||||
index 000000000..adef7bb6c
|
||||
--- /dev/null
|
||||
+++ b/device_mapper/raid/raid_parser.c
|
||||
@@ -0,0 +1,164 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2024 Red Hat, Inc. All rights reserved.
|
||||
+ *
|
||||
+ * This file is part of the device-mapper userspace tools.
|
||||
+ *
|
||||
+ * This copyrighted material is made available to anyone wishing to use,
|
||||
+ * modify, copy, or redistribute it subject to the terms and conditions
|
||||
+ * of the GNU Lesser General Public License v.2.1.
|
||||
+ *
|
||||
+ * You should have received a copy of the GNU Lesser General Public License
|
||||
+ * along with this program; if not, write to the Free Software Foundation,
|
||||
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
+ */
|
||||
+
|
||||
+/*
|
||||
+ * Support counting number of failed device bits in dm-raid superblock bit arrays or clear them out.
|
||||
+ */
|
||||
+
|
||||
+#include "device_mapper/misc/dmlib.h"
|
||||
+#include "device_mapper/all.h"
|
||||
+#include "device_mapper/raid/target.h"
|
||||
+#include <fcntl.h>
|
||||
+#include <unistd.h>
|
||||
+
|
||||
+/* Copied/derived from kernel's drivers/md/dm-raid.c so this is prone to out-of-sync (factor out to header file?). */
|
||||
+#define MAX_RAID_DEVICES 253 /* md-raid kernel limit? */
|
||||
+#define UINT64_BITS (sizeof(uint64_t) * 8)
|
||||
+#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (UINT64_BITS - 1)) / UINT64_BITS)
|
||||
+#define DM_RAID_SB_MAGIC 0x446D5264 /* "DmRd" */
|
||||
+#define FEATURE_FLAG_SUPPORTS_V190 0x1 /* Supports extended superblock */
|
||||
+
|
||||
+/* RAID superblock at beginning of rmeta SubLVs trimmed down to mandatory members. */
|
||||
+struct dm_raid_superblock {
|
||||
+ __le32 magic; /* "DmRd" */
|
||||
+ __le32 compat_features; /* Used to indicate compatible features (like 1.9.0 ondisk metadata extension) */
|
||||
+ __le32 dummy[4];
|
||||
+ __le64 failed_devices; /* Pre 1.9.0 part of bit field of devices to */
|
||||
+ /* indicate device failures (see extension below) */
|
||||
+ __le32 dummy1[7];
|
||||
+
|
||||
+ /********************************************************************
|
||||
+ * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
|
||||
+ *
|
||||
+ * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
|
||||
+ */
|
||||
+ __le32 flags; /* Flags defining array states for reshaping */
|
||||
+ __le32 dummy2[14];
|
||||
+ __le64 extended_failed_devices[DISKS_ARRAY_ELEMS - 1];
|
||||
+
|
||||
+ __le32 dummy3;
|
||||
+ /* Always set rest up to logical block size to 0 when writing ... */
|
||||
+} __packed;
|
||||
+/* END: Copied from ... */
|
||||
+
|
||||
+/* Superblock I/O buffer size to be able to Cope with 4K native devices... */
|
||||
+#define SB_BUFSZ 4096
|
||||
+
|
||||
+static size_t _get_sb_size(const struct dm_raid_superblock *sb)
|
||||
+{
|
||||
+ return (FEATURE_FLAG_SUPPORTS_V190 & le32toh(sb->compat_features)) ?
|
||||
+ sizeof(*sb) : ((char *) &sb->flags - (char *) sb);
|
||||
+}
|
||||
+
|
||||
+static uint32_t _hweight64(__le64 v)
|
||||
+{
|
||||
+ uint32_t r = 0;
|
||||
+
|
||||
+ while (v) {
|
||||
+ r += v & 1;
|
||||
+ v >>= 1;
|
||||
+ }
|
||||
+
|
||||
+ return r;
|
||||
+}
|
||||
+
|
||||
+static uint32_t _hweight_failed(struct dm_raid_superblock *sb)
|
||||
+{
|
||||
+ uint32_t r = _hweight64(sb->failed_devices);
|
||||
+
|
||||
+ if (_get_sb_size(sb) == sizeof(*sb)) {
|
||||
+ size_t i = DM_ARRAY_SIZE(sb->extended_failed_devices);
|
||||
+
|
||||
+ while (i--)
|
||||
+ r = max(r, _hweight64(sb->extended_failed_devices[i]));
|
||||
+ }
|
||||
+
|
||||
+ return r;
|
||||
+}
|
||||
+
|
||||
+static void _clear_failed_devices(struct dm_raid_superblock *sb)
|
||||
+{
|
||||
+
|
||||
+ sb->failed_devices = 0;
|
||||
+
|
||||
+ if (_get_sb_size(sb) == sizeof(*sb))
|
||||
+ memset(sb->extended_failed_devices, 0, sizeof(sb->extended_failed_devices));
|
||||
+}
|
||||
+
|
||||
+static int _count_or_clear_failed_devices(const char *dev_path, bool clear, uint32_t *nr_failed)
|
||||
+{
|
||||
+ struct dm_raid_superblock *sb = NULL;
|
||||
+ size_t sz;
|
||||
+ int fd, r = 0;
|
||||
+
|
||||
+ if (posix_memalign((void *) &sb, SB_BUFSZ, SB_BUFSZ)) {
|
||||
+ log_sys_error("Failed to allocate RAID superblock buffer", dev_path);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ fd = open(dev_path, O_EXCL | ((clear) ? O_RDWR : O_RDONLY) | O_DIRECT);
|
||||
+ if (fd < 0) {
|
||||
+ log_sys_error("Failed to open RAID metadata volume", dev_path);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if (read(fd, sb, SB_BUFSZ) != SB_BUFSZ) {
|
||||
+ log_sys_error("Failed to read RAID metadata volume", dev_path);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /* FIXME: big endian??? */
|
||||
+ if (sb->magic != htobe32(DM_RAID_SB_MAGIC)) {
|
||||
+ log_error("No RAID signature on %s.", dev_path);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if (nr_failed)
|
||||
+ *nr_failed = _hweight_failed(sb);
|
||||
+
|
||||
+ if (clear) {
|
||||
+ if (lseek(fd, 0, SEEK_SET) < 0) {
|
||||
+ log_sys_error("Failed to seek RAID metadata volume", dev_path);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ sz = _get_sb_size(sb);
|
||||
+ memset((void *)((char *) sb + sz), 0, SB_BUFSZ - sz);
|
||||
+ _clear_failed_devices(sb);
|
||||
+ if (write(fd, sb, SB_BUFSZ) != SB_BUFSZ) {
|
||||
+ log_sys_error("Failed to clear RAID metadata volume", dev_path);
|
||||
+ goto out;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ r = 1;
|
||||
+
|
||||
+out:
|
||||
+ if ((fd >= 0) && close(fd))
|
||||
+ log_sys_debug("close", dev_path);
|
||||
+
|
||||
+ free(sb);
|
||||
+
|
||||
+ return r;
|
||||
+}
|
||||
+
|
||||
+int dm_raid_count_failed_devices(const char *dev_path, uint32_t *nr_failed)
|
||||
+{
|
||||
+ return _count_or_clear_failed_devices(dev_path, false, nr_failed);
|
||||
+}
|
||||
+
|
||||
+int dm_raid_clear_failed_devices(const char *dev_path, uint32_t *nr_failed)
|
||||
+{
|
||||
+ return _count_or_clear_failed_devices(dev_path, true, nr_failed);
|
||||
+}
|
||||
diff --git a/device_mapper/raid/target.h b/device_mapper/raid/target.h
|
||||
new file mode 100644
|
||||
index 000000000..3e3ec024c
|
||||
--- /dev/null
|
||||
+++ b/device_mapper/raid/target.h
|
||||
@@ -0,0 +1,23 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2024 Red Hat, Inc. All rights reserved.
|
||||
+ *
|
||||
+ * This file is part of the device-mapper userspace tools.
|
||||
+ *
|
||||
+ * This copyrighted material is made available to anyone wishing to use,
|
||||
+ * modify, copy, or redistribute it subject to the terms and conditions
|
||||
+ * of the GNU Lesser General Public License v.2.1.
|
||||
+ *
|
||||
+ * You should have received a copy of the GNU Lesser General Public License
|
||||
+ * along with this program; if not, write to the Free Software Foundation,
|
||||
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
+ */
|
||||
+
|
||||
+#ifndef DEVICE_MAPPER_RAID_TARGET_H
|
||||
+#define DEVICE_MAPPER_RAID_TARGET_H
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+int dm_raid_count_failed_devices(const char *dev_path, uint32_t *nr_failed);
|
||||
+int dm_raid_clear_failed_devices(const char *dev_path, uint32_t *nr_failed);
|
||||
+
|
||||
+#endif
|
||||
diff --git a/lib/activate/activate.h b/lib/activate/activate.h
|
||||
index bd30e2655..155936135 100644
|
||||
--- a/lib/activate/activate.h
|
||||
+++ b/lib/activate/activate.h
|
||||
@@ -197,6 +197,8 @@ int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt);
|
||||
int lv_raid_sync_action(const struct logical_volume *lv, char **sync_action);
|
||||
int lv_raid_message(const struct logical_volume *lv, const char *msg);
|
||||
int lv_raid_status(const struct logical_volume *lv, struct lv_status_raid **status);
|
||||
+int lv_raid_clear_failed_devices(const struct logical_volume *lv);
|
||||
+int lv_raid_count_failed_devices(const struct logical_volume *lv, uint32_t *failed_cnt);
|
||||
int lv_writecache_message(const struct logical_volume *lv, const char *msg);
|
||||
int lv_cache_status(const struct logical_volume *cache_lv,
|
||||
struct lv_status_cache **status);
|
||||
diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c
|
||||
index eb4a2eb45..2a78a0bec 100644
|
||||
--- a/lib/metadata/raid_manip.c
|
||||
+++ b/lib/metadata/raid_manip.c
|
||||
@@ -3242,7 +3242,7 @@ static int _raid_leg_degraded(struct lv_segment *raid_seg, uint32_t s)
|
||||
_sublv_is_degraded(seg_metalv(raid_seg, s))));
|
||||
}
|
||||
|
||||
-/* Return failed component SubLV count for @lv. */
|
||||
+/* Return failed component SubLV pair count for @lv. */
|
||||
static uint32_t _lv_get_nr_failed_components(const struct logical_volume *lv)
|
||||
{
|
||||
uint32_t r = 0, s;
|
||||
@@ -7328,6 +7328,183 @@ int lv_raid_remove_missing(struct logical_volume *lv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Count number of failed device bits in dm-raid superblock bit arrays -or- clear them out.
|
||||
+ *
|
||||
+ * If any failed devices, return != 0 maximum of failed SubLVs and parity_devs so that the
|
||||
+ * caller will ask to clear and try activation of the RaidLV unless more than parity_devs
|
||||
+ * component device pairs (rmeta and rimage) are still failed. This'll allow early exit
|
||||
+ * in the caller preventing MD kernel rejection to activate the RAID array with > parity_devs
|
||||
+ * failed component device pairs.
|
||||
+ */
|
||||
+static int _count_or_clear_failed_devices_bits(struct logical_volume *meta_lv,
|
||||
+ bool clear, uint32_t *nr_failed)
|
||||
+{
|
||||
+ char *meta_path = lv_dmpath_dup(meta_lv->vg->cmd->mem, meta_lv);
|
||||
+
|
||||
+ if (!meta_path) {
|
||||
+ log_error("Failed to build device path for %s.",
|
||||
+ display_lvname(meta_lv));
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ if (!clear) /* only counting */
|
||||
+ return dm_raid_count_failed_devices(meta_path, nr_failed);
|
||||
+
|
||||
+ return dm_raid_clear_failed_devices(meta_path, nr_failed);
|
||||
+}
|
||||
+
|
||||
+/* Count or clear failed devices bits in RAID superblocks for
|
||||
+ * recurred transiently failed component SubLV pairs. */
|
||||
+static int _raid_count_or_clear_failed_devices(const struct logical_volume *lv,
|
||||
+ bool clear, uint32_t *failed_devices)
|
||||
+{
|
||||
+ uint32_t nr_failed = 0, nr_failed_tmp = 0, failed_sublvs = 0, s;
|
||||
+ struct lv_segment *raid_seg = first_seg(lv);
|
||||
+ struct logical_volume *meta_lv;
|
||||
+ const char *str;
|
||||
+ int r = 1, cleared_devs = 0;
|
||||
+
|
||||
+ /* Prevent bogus use. */
|
||||
+ if (!seg_is_raid_with_meta(raid_seg)) {
|
||||
+ log_error("%s is not a RaidLV with metadata.", display_lvname(lv));
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ failed_sublvs = _lv_get_nr_failed_components(lv);
|
||||
+
|
||||
+ if (clear && (failed_sublvs > raid_seg->segtype->parity_devs)) {
|
||||
+ log_error("Can't clear transiently failed devices on still failed %s.",
|
||||
+ display_lvname(lv));
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ if (!raid_seg->meta_areas) {
|
||||
+ log_error(INTERNAL_ERROR "Missing metadata areas on %s!", display_lvname(lv));
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ /* Check if there isn't any meta LV already active */
|
||||
+ for (s = 0; s < raid_seg->area_count; s++) {
|
||||
+ if (_raid_leg_degraded(raid_seg, s))
|
||||
+ continue;
|
||||
+
|
||||
+ meta_lv = seg_metalv(raid_seg, s);
|
||||
+
|
||||
+ if (lv_is_active(meta_lv)) {
|
||||
+ /* DM table is in some unknown condition, aborting... */
|
||||
+ log_error("Can't %s failed devices with active %s metadata volume %s.",
|
||||
+ clear ? "clear" : "count",
|
||||
+ lvseg_name(raid_seg), display_lvname(meta_lv));
|
||||
+ return 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Activate all non degraded meta LVs before count or clear */
|
||||
+ for (s = 0; s < raid_seg->area_count; s++) {
|
||||
+ meta_lv = seg_metalv(raid_seg, s);
|
||||
+
|
||||
+ if (_raid_leg_degraded(raid_seg, s)) {
|
||||
+ log_debug("Skipping activation of failed devices for degraded %s metadata volume %s.",
|
||||
+ lvseg_name(raid_seg), display_lvname(meta_lv));
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (!activate_lv(lv->vg->cmd, meta_lv)) {
|
||||
+ log_error("Failed to activate %s metadata volume %s.",
|
||||
+ lvseg_name(raid_seg), display_lvname(meta_lv));
|
||||
+ r = 0; /* how many can be counted... */
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Wait for meta activation. */
|
||||
+ if (!sync_local_dev_names(lv->vg->cmd))
|
||||
+ stack;
|
||||
+
|
||||
+ for (s = 0; s < raid_seg->area_count; s++) {
|
||||
+ meta_lv = seg_metalv(raid_seg, s);
|
||||
+
|
||||
+ if (_raid_leg_degraded(raid_seg, s)) {
|
||||
+ if (clear)
|
||||
+ log_debug("Skipping clear of failed devices for degraded %s metadata volume %s.",
|
||||
+ lvseg_name(raid_seg), display_lvname(meta_lv));
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (lv_is_active(meta_lv) &&
|
||||
+ !_count_or_clear_failed_devices_bits(meta_lv, clear,
|
||||
+ &nr_failed_tmp)) {
|
||||
+ log_error("Failed to %s failed device(s) in superblock of %s metadata volume %s.",
|
||||
+ clear ? "clear" : "count",
|
||||
+ lvseg_name(raid_seg), display_lvname(meta_lv));
|
||||
+ r = 0;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (nr_failed_tmp) {
|
||||
+ log_verbose("%s %u failed device(s) in superblock of %s metadata volume %s.",
|
||||
+ clear ? "Cleared" : "Counted", nr_failed_tmp,
|
||||
+ lvseg_name(raid_seg), display_lvname(meta_lv));
|
||||
+ cleared_devs++;
|
||||
+ }
|
||||
+
|
||||
+ if (nr_failed_tmp > nr_failed)
|
||||
+ nr_failed = nr_failed_tmp;
|
||||
+ }
|
||||
+
|
||||
+ /* Deactivate meta LVs */
|
||||
+ for (s = 0; s < raid_seg->area_count; s++) {
|
||||
+ if (_raid_leg_degraded(raid_seg, s))
|
||||
+ continue;
|
||||
+
|
||||
+ if (!deactivate_lv(lv->vg->cmd, seg_metalv(raid_seg, s))) {
|
||||
+ stack;
|
||||
+ r = 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (clear) {
|
||||
+ if (!failed_sublvs)
|
||||
+ str = "fully operational";
|
||||
+ else if (failed_sublvs <= raid_seg->segtype->parity_devs)
|
||||
+ str = "degraded";
|
||||
+ else
|
||||
+ str = "still failed";
|
||||
+
|
||||
+ log_print_unless_silent("The %s volume %s is %s with %u transiently failed device(s).",
|
||||
+ lvseg_name(raid_seg), display_lvname(lv), str,
|
||||
+ nr_failed - failed_sublvs);
|
||||
+
|
||||
+ if (r && cleared_devs &&
|
||||
+ (failed_sublvs <= raid_seg->segtype->parity_devs))
|
||||
+ /* TODO: maybe we want to activate RAID volume here ? */
|
||||
+ log_print_unless_silent("Volume has been restored after clearing %u superblocks(s). Once online please check its content.",
|
||||
+ cleared_devs);
|
||||
+ }
|
||||
+
|
||||
+ if (failed_devices)
|
||||
+ *failed_devices = max(failed_sublvs, raid_seg->segtype->parity_devs);
|
||||
+
|
||||
+ return r;
|
||||
+}
|
||||
+
|
||||
+/* Clear failed device bits in RAID superblocks for recurred
|
||||
+ * transiently failed component SubLV pairs. */
|
||||
+int lv_raid_clear_failed_devices(const struct logical_volume *lv)
|
||||
+{
|
||||
+ return _raid_count_or_clear_failed_devices(lv, true, NULL);
|
||||
+}
|
||||
+
|
||||
+/* Count failed device bits in RAID superblocks for recurred
|
||||
+ * transiently failed component SubLV pairs.
|
||||
+ *
|
||||
+ * On success, @failed_cnt contains the current number.
|
||||
+ */
|
||||
+int lv_raid_count_failed_devices(const struct logical_volume *lv, uint32_t *failed_cnt)
|
||||
+{
|
||||
+ return _raid_count_or_clear_failed_devices(lv, false, failed_cnt);
|
||||
+}
|
||||
+
|
||||
/* Return 1 if a partial raid LV can be activated redundantly */
|
||||
static int _partial_raid_lv_is_redundant(const struct logical_volume *lv)
|
||||
{
|
||||
--
|
||||
2.49.0
|
||||
|
64
0011-lvconvert-allow-clearing-superblocks.patch
Normal file
64
0011-lvconvert-allow-clearing-superblocks.patch
Normal file
@ -0,0 +1,64 @@
|
||||
From 4fbda3d36dc02bb682dd8579c8a4ebfc692cbfe3 Mon Sep 17 00:00:00 2001
|
||||
From: Zdenek Kabelac <zkabelac@redhat.com>
|
||||
Date: Fri, 31 Jan 2025 21:45:57 +0100
|
||||
Subject: [PATCH 11/14] lvconvert: allow clearing superblocks
|
||||
|
||||
(cherry picked from commit 0a8f560c759de0f3335213896c2a3383009c7206)
|
||||
---
|
||||
tools/lvconvert.c | 38 ++++++++++++++++++++++++++++++++++++--
|
||||
1 file changed, 36 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tools/lvconvert.c b/tools/lvconvert.c
|
||||
index 72ef74cef..270bae7d0 100644
|
||||
--- a/tools/lvconvert.c
|
||||
+++ b/tools/lvconvert.c
|
||||
@@ -3874,10 +3874,44 @@ static int _lvconvert_repair_pvs_raid(struct cmd_context *cmd, struct logical_vo
|
||||
{
|
||||
struct dm_list *failed_pvs;
|
||||
int do_it;
|
||||
+ uint32_t failed_cnt = 0;
|
||||
+ struct lv_segment *raid_seg;
|
||||
|
||||
if (!lv_is_active(lv_lock_holder(lv))) {
|
||||
- log_error("%s must be active to perform this operation.", display_lvname(lv));
|
||||
- return 0;
|
||||
+ if (!lv_raid_count_failed_devices(lv, &failed_cnt))
|
||||
+ return_0;
|
||||
+
|
||||
+ if (!failed_cnt) {
|
||||
+ log_error("Logical volume %s must be active to perform this operation.",
|
||||
+ display_lvname(lv));
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ raid_seg = first_seg(lv);
|
||||
+
|
||||
+ if (failed_cnt > raid_seg->segtype->parity_devs) {
|
||||
+ log_error("Can't clear %u failed superblock(s) in %s volume %s.",
|
||||
+ failed_cnt, lvseg_name(raid_seg), display_lvname(lv));
|
||||
+ log_print_unless_silent("The maximum number of degraded devices allowed is %u.",
|
||||
+ raid_seg->segtype->parity_devs);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ if (!arg_count(cmd, yes_ARG) &&
|
||||
+ yes_no_prompt("Attempt to clear %u transiently failed %s superblock(s) in %s? [y/n]: ",
|
||||
+ failed_cnt, lvseg_name(raid_seg), display_lvname(lv)) == 'n') {
|
||||
+ log_error("Logical volume %s with %u transiently failed %s superblock(s) NOT repaired.",
|
||||
+ display_lvname(lv), failed_cnt, lvseg_name(raid_seg));
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ log_verbose("Clearing %u transiently failed %s superblock(s) in %s.",
|
||||
+ failed_cnt, lvseg_name(raid_seg), display_lvname(lv));
|
||||
+
|
||||
+ if (!lv_raid_clear_failed_devices(lv))
|
||||
+ return_0;
|
||||
+
|
||||
+ return 1;
|
||||
}
|
||||
|
||||
lv_check_transient(lv); /* TODO check this in lib for all commands? */
|
||||
--
|
||||
2.49.0
|
||||
|
107
0012-test-check-raid-superblock-clearing.patch
Normal file
107
0012-test-check-raid-superblock-clearing.patch
Normal file
@ -0,0 +1,107 @@
|
||||
From 7ef2e32ab91194abfcacb96eae2577cf19e66123 Mon Sep 17 00:00:00 2001
|
||||
From: Zdenek Kabelac <zkabelac@redhat.com>
|
||||
Date: Sun, 2 Feb 2025 19:21:05 +0100
|
||||
Subject: [PATCH 12/14] test: check raid superblock clearing
|
||||
|
||||
(cherry picked from commit 0cafb18978d4a720845c2830fcff129ce9b79327)
|
||||
---
|
||||
test/shell/lvconvert-repair-raid5.sh | 62 +++++++++++++++++++++-------
|
||||
1 file changed, 47 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/test/shell/lvconvert-repair-raid5.sh b/test/shell/lvconvert-repair-raid5.sh
|
||||
index a14d90be9..66485c536 100644
|
||||
--- a/test/shell/lvconvert-repair-raid5.sh
|
||||
+++ b/test/shell/lvconvert-repair-raid5.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-# Copyright (C) 2024 Red Hat, Inc. All rights reserved.
|
||||
+# Copyright (C) 2024-2025 Red Hat, Inc. All rights reserved.
|
||||
#
|
||||
# This copyrighted material is made available to anyone wishing to use,
|
||||
# modify, copy, or redistribute it subject to the terms and conditions
|
||||
@@ -24,17 +24,31 @@ SKIP_WITH_LVMPOLLD=1
|
||||
#
|
||||
aux have_raid 1 8 0 || skip
|
||||
|
||||
-aux prepare_vg 4
|
||||
-get_devs
|
||||
+aux prepare_vg 6
|
||||
|
||||
-#offset=$(get first_extent_sector "$dev1")
|
||||
|
||||
-# It's possible small raid arrays do have problems with reporting in-sync.
|
||||
-# So try bigger size
|
||||
-RAID_SIZE=8
|
||||
+lvcreate --type raid5 -i 5 -L8 -n $lv1 $vg
|
||||
+aux wait_for_sync $vg $lv1
|
||||
+
|
||||
+lvs -ao+devices $vg
|
||||
+
|
||||
+# fail 2 drives out of 4
|
||||
+aux error_dev "$dev2"
|
||||
+aux error_dev "$dev3"
|
||||
+
|
||||
+lvchange -an $vg
|
||||
+
|
||||
+aux enable_dev "$dev2"
|
||||
+aux enable_dev "$dev3"
|
||||
+
|
||||
+lvconvert --yes --repair $vg/$lv1 -v
|
||||
+
|
||||
+lvremove -f $vg
|
||||
|
||||
-# RAID1 transient failure check
|
||||
-lvcreate --type raid5 -i 3 -L $RAID_SIZE -n $lv1 $vg
|
||||
+
|
||||
+# Raid5 transient failure check
|
||||
+
|
||||
+lvcreate --type raid5 -i 3 -L8 -n $lv1 $vg
|
||||
aux wait_for_sync $vg $lv1
|
||||
|
||||
lvs -ao+devices $vg
|
||||
@@ -43,17 +57,35 @@ lvs -ao+devices $vg
|
||||
aux error_dev "$dev2"
|
||||
aux error_dev "$dev3"
|
||||
|
||||
+not lvconvert --yes --repair $vg/$lv1
|
||||
+
|
||||
# deactivate immediately
|
||||
lvchange -an $vg
|
||||
|
||||
+# Raid5 cannot activate with only 2 disks
|
||||
+not lvchange -ay $vg
|
||||
+
|
||||
+# also it cannot be repaired
|
||||
+not lvconvert --yes --repair $vg/$lv1
|
||||
+
|
||||
+# restore 1st. failed drive
|
||||
aux enable_dev "$dev2"
|
||||
+
|
||||
+# Raid5 should be now repairable
|
||||
+lvconvert --yes --repair $vg/$lv1
|
||||
+
|
||||
+# Raid5 volume is working now
|
||||
+lvchange -ay $vg
|
||||
+
|
||||
+# again deactivate
|
||||
+lvchange -an $vg
|
||||
+
|
||||
+# restore 2nd. missing drive
|
||||
aux enable_dev "$dev3"
|
||||
|
||||
-# ATM we are failing here with this kernel message:
|
||||
-#
|
||||
-# md/raid:mdX: Cannot continue operation (2/4 failed).
|
||||
-#
|
||||
-# Raid5 LV cannot be started any more
|
||||
-should lvchange -ay $vg
|
||||
+# still repairable
|
||||
+lvconvert --yes --repair $vg/$lv1
|
||||
+
|
||||
+lvchange -ay $vg
|
||||
|
||||
vgremove -ff $vg
|
||||
--
|
||||
2.49.0
|
||||
|
50
0013-man-update-raid-man.patch
Normal file
50
0013-man-update-raid-man.patch
Normal file
@ -0,0 +1,50 @@
|
||||
From 63312cb3343d6370c89ab0ff4e93aa39aab25fa5 Mon Sep 17 00:00:00 2001
|
||||
From: Zdenek Kabelac <zkabelac@redhat.com>
|
||||
Date: Mon, 12 May 2025 15:05:46 +0200
|
||||
Subject: [PATCH 13/14] man: update raid man
|
||||
|
||||
Mention repair of transiently lost devices.
|
||||
|
||||
(cherry picked from commit 22364ce9b68b3a2e03819bfd8fdc569df584b7e2)
|
||||
---
|
||||
man/lvmraid.7_main | 17 ++++++++++++++++-
|
||||
1 file changed, 16 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/man/lvmraid.7_main b/man/lvmraid.7_main
|
||||
index 5b429a410..db8009d46 100644
|
||||
--- a/man/lvmraid.7_main
|
||||
+++ b/man/lvmraid.7_main
|
||||
@@ -434,7 +434,7 @@ found, run:
|
||||
# lvs -o name,raid_mismatch_count
|
||||
.EE
|
||||
.P
|
||||
-Also, if mismatches were found, the
|
||||
+Also, if mismatches were found, the
|
||||
.BR lvs (8)
|
||||
attr field will display the letter
|
||||
"m" (mismatch) in the 9th position, e.g.
|
||||
@@ -862,6 +862,21 @@ those PVs can be reconstructed with:
|
||||
The rebuild option can be repeated with different PVs to replace the data
|
||||
on multiple PVs.
|
||||
.
|
||||
+.SS Reactivating arrays after temporary device loss
|
||||
+
|
||||
+When a RAID array loses a critical number of devices,
|
||||
+causing it to lose its ability to function reliably,
|
||||
+the array will stop and require repair.
|
||||
+
|
||||
+Initiate repair process with this command:
|
||||
+.P
|
||||
+.B lvconvert --repair
|
||||
+.I LV
|
||||
+.P
|
||||
+If the previously unavailable devices become accessible again,
|
||||
+this repair process will update their metadata and
|
||||
+the RAID array can be reactivated.
|
||||
+.
|
||||
.SH DATA INTEGRITY
|
||||
.
|
||||
The device mapper integrity target can be used in combination with RAID
|
||||
--
|
||||
2.49.0
|
||||
|
25
0014-WHATS_NEW-update.patch
Normal file
25
0014-WHATS_NEW-update.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From 14f920c81c9499bcfcc0414c494f7abc0b9f3d58 Mon Sep 17 00:00:00 2001
|
||||
From: Zdenek Kabelac <zkabelac@redhat.com>
|
||||
Date: Mon, 12 May 2025 15:07:15 +0200
|
||||
Subject: [PATCH 14/14] WHATS_NEW: update
|
||||
|
||||
(cherry picked from commit 759d1bfe113ca7a80248821808d13ed9e011201c)
|
||||
---
|
||||
WHATS_NEW | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/WHATS_NEW b/WHATS_NEW
|
||||
index 4d8b59681..c9af7be6e 100644
|
||||
--- a/WHATS_NEW
|
||||
+++ b/WHATS_NEW
|
||||
@@ -1,3 +1,7 @@
|
||||
+Version 2.03.33 -
|
||||
+==================
|
||||
+ Repair raid arrays with transiently lost devices.
|
||||
+
|
||||
Version 2.03.32 - 05th May 2025
|
||||
===============================
|
||||
Lvconvert vdopool conversion propperly validates acceptable LVs.
|
||||
--
|
||||
2.49.0
|
||||
|
19
lvm2.spec
19
lvm2.spec
@ -54,7 +54,7 @@ Version: 2.03.32
|
||||
%if 0%{?from_snapshot}
|
||||
Release: 0.1.20211115git%{shortcommit}%{?dist}%{?rel_suffix}
|
||||
%else
|
||||
Release: 1%{?dist}%{?rel_suffix}
|
||||
Release: 2%{?dist}%{?rel_suffix}
|
||||
%endif
|
||||
License: GPL-2.0-only
|
||||
URL: https://sourceware.org/lvm2
|
||||
@ -70,6 +70,15 @@ Patch4: 0004-Revert-dm-udev-rules-don-t-export-and-save-DM_SUSPEN.patch
|
||||
Patch5: 0005-Revert-11-dm-lvm.rules-don-t-restore-DM_UDEV_DISABLE.patch
|
||||
Patch6: 0006-Revert-10-dm-rules-don-t-restore-DM_UDEV_DISABLE_OTH.patch
|
||||
Patch7: 0007-WHATS_NEW_DM-update.patch
|
||||
# RHEL-94577:
|
||||
Patch8: 0008-lvmlockd-fix-hosts-check-for-vgremove.patch
|
||||
Patch9: 0009-lvmlockd-fix-sanlock_release-for-vgremove.patch
|
||||
# RHEL-67039:
|
||||
Patch10: 0010-raid-count-or-clear-transiently-failed-devices.patch
|
||||
Patch11: 0011-lvconvert-allow-clearing-superblocks.patch
|
||||
Patch12: 0012-test-check-raid-superblock-clearing.patch
|
||||
Patch13: 0013-man-update-raid-man.patch
|
||||
Patch14: 0014-WHATS_NEW-update.patch
|
||||
|
||||
BuildRequires: make
|
||||
BuildRequires: gcc
|
||||
@ -701,6 +710,14 @@ An extensive functional testsuite for LVM2.
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Tue Jun 03 2025 Marian Csontos <mcsontos@redhat.com> - 2.03.32-2
|
||||
- Fix vgremove hanging immediately after lockstart.
|
||||
- Add repair option for RAID volumes with too many transiently failed devices.
|
||||
|
||||
* Tue May 06 2025 Marian Csontos <mcsontos@redhat.com> - 2.03.32-1
|
||||
- Update to upstream version 2.03.32.
|
||||
- See WHATS_NEW and WHATS_NEW_DM for more information.
|
||||
|
||||
* Mon Feb 03 2025 Marian Csontos <mcsontos@redhat.com> - 2.03.28-6
|
||||
- Fix race causing lvm2 not recognizing active devices.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user