From d8f301b9244d93695b344c33e9ff7a116b5f17b7 Mon Sep 17 00:00:00 2001 From: Marian Csontos Date: Sun, 9 Aug 2020 16:42:09 +0200 Subject: [PATCH] Merge master up to commit c1d136fea3d1 (cherry picked from commit 4ef278fcb7c08721e973af7300fd1bff5d142398) --- WHATS_NEW | 11 +- conf/example.conf.in | 6 +- daemons/lvmdbusd/cmdhandler.py | 9 + daemons/lvmdbusd/lv.py | 45 +- daemons/lvmdbusd/manager.py | 2 +- device_mapper/all.h | 4 + device_mapper/libdm-deptree.c | 12 + include/configure.h.in | 6 +- lib/cache/lvmcache.c | 4 + lib/config/config_settings.h | 5 +- lib/config/defaults.h | 1 + lib/device/bcache.c | 2 +- lib/device/dev-cache.c | 1 + lib/device/dev-md.c | 206 ++++++--- lib/device/dev-type.c | 36 +- lib/label/hints.c | 4 +- lib/metadata/cache_manip.c | 4 + lib/metadata/integrity_manip.c | 61 ++- lib/metadata/lv.c | 3 + lib/metadata/lv_manip.c | 102 +++-- lib/metadata/merge.c | 3 +- lib/metadata/metadata-exported.h | 7 +- lib/metadata/metadata.c | 4 +- lib/metadata/pool_manip.c | 6 +- lib/metadata/raid_manip.c | 5 + lib/metadata/snapshot_manip.c | 2 - lib/metadata/writecache_manip.c | 365 ++++++++++++++-- lib/report/report.c | 18 +- lib/writecache/writecache.c | 49 +++ man/lvconvert.8_pregen | 58 +++ man/lvcreate.8_pregen | 246 ++++++++++- man/lvmcache.7_main | 85 +++- man/lvs.8_end | 4 + man/lvs.8_pregen | 4 + man/vgck.8_pregen | 9 + scripts/blkdeactivate.sh.in | 6 + test/dbus/lvmdbustest.py | 30 ++ test/lib/aux.sh | 1 + test/shell/cachevol-cachedevice.sh | 222 ++++++++++ test/shell/integrity-blocksize-2.sh | 128 ++++++ test/shell/integrity-blocksize-3.sh | 285 ++++++++++++ test/shell/integrity-blocksize.sh | 108 ++++- test/shell/integrity-large.sh | 23 +- test/shell/integrity-misc.sh | 27 +- test/shell/integrity.sh | 46 +- test/shell/lvconvert-m-raid1-degraded.sh | 6 +- test/shell/lvcreate-signature-wiping.sh | 7 + test/shell/lvcreate-thin.sh | 21 + test/shell/writecache-blocksize.sh | 342 +++++++++++++++ test/shell/writecache-large.sh | 153 +++++++ test/shell/writecache-split.sh | 34 +- test/shell/writecache.sh | 315 +++++++++----- tools/args.h | 17 +- tools/command-lines.in | 145 +++--- tools/command.c | 3 + tools/lvchange.c | 85 ++++ tools/lvconvert.c | 726 +++++++++++++++++++------------ tools/lvcreate.c | 153 ++++++- tools/lvmcmdline.c | 8 + tools/toollib.c | 164 +++++++ tools/toollib.h | 3 + tools/tools.h | 11 + 62 files changed, 3761 insertions(+), 697 deletions(-) create mode 100644 test/shell/cachevol-cachedevice.sh create mode 100644 test/shell/integrity-blocksize-2.sh create mode 100644 test/shell/integrity-blocksize-3.sh create mode 100644 test/shell/writecache-blocksize.sh create mode 100644 test/shell/writecache-large.sh diff --git a/WHATS_NEW b/WHATS_NEW index c0267b7..ac99e97 100644 --- a/WHATS_NEW +++ b/WHATS_NEW @@ -1,5 +1,14 @@ Version 2.03.10 - -================================= +================================== + Add writecache and integrity support to lvmdbusd. + Generate unique cachevol name when default required from lvcreate. + Converting RAID1 volume to one with same number of legs now succeeds with a + warning. + Fix conversion to raid from striped lagging type. + Fix conversion to 'mirrored' mirror log with larger regionsize. + Zero pool metadata on allocation (disable with allocation/zero_metadata=0). + Failure in zeroing or wiping will fail command (bypass with -Zn, -Wn). + Fix running out of free buffers for async writing for larger writes. Add integrity with raid capability. Fix support for lvconvert --repair used by foreign apps (i.e. Docker). diff --git a/conf/example.conf.in b/conf/example.conf.in index 88858fc..d5807e6 100644 --- a/conf/example.conf.in +++ b/conf/example.conf.in @@ -489,7 +489,7 @@ allocation { # This configuration option does not have a default value defined. # Configuration option allocation/thin_pool_metadata_require_separate_pvs. - # Thin pool metdata and data will always use different PVs. + # Thin pool metadata and data will always use different PVs. thin_pool_metadata_require_separate_pvs = 0 # Configuration option allocation/thin_pool_zero. @@ -527,6 +527,10 @@ allocation { # This configuration option has an automatic default value. # thin_pool_chunk_size_policy = "generic" + # Configuration option allocation/zero_metadata. + # Zero whole metadata area before use with thin or cache pool. + zero_metadata = 1 + # Configuration option allocation/thin_pool_chunk_size. # The minimal chunk size in KiB for thin pool volumes. # Larger chunk sizes may improve performance for plain thin volumes, diff --git a/daemons/lvmdbusd/cmdhandler.py b/daemons/lvmdbusd/cmdhandler.py index 7d2f4c4..1c15b78 100644 --- a/daemons/lvmdbusd/cmdhandler.py +++ b/daemons/lvmdbusd/cmdhandler.py @@ -453,6 +453,15 @@ def lv_cache_lv(cache_pool_full_name, lv_full_name, cache_options): return call(cmd) +def lv_writecache_lv(cache_lv_full_name, lv_full_name, cache_options): + # lvconvert --type writecache --cachevol VG/CacheLV VG/OriginLV + cmd = ['lvconvert'] + cmd.extend(options_to_cli_args(cache_options)) + cmd.extend(['-y', '--type', 'writecache', '--cachevol', + cache_lv_full_name, lv_full_name]) + return call(cmd) + + def lv_detach_cache(lv_full_name, detach_options, destroy_cache): cmd = ['lvconvert'] if destroy_cache: diff --git a/daemons/lvmdbusd/lv.py b/daemons/lvmdbusd/lv.py index fd46f34..edfdd0d 100644 --- a/daemons/lvmdbusd/lv.py +++ b/daemons/lvmdbusd/lv.py @@ -388,7 +388,7 @@ class LvCommon(AutomatedProperties): 'l': 'mirror log device', 'c': 'under conversion', 'V': 'thin Volume', 't': 'thin pool', 'T': 'Thin pool data', 'e': 'raid or pool metadata or pool metadata spare', - 'd': 'vdo pool', 'D': 'vdo pool data', + 'd': 'vdo pool', 'D': 'vdo pool data', 'g': 'integrity', '-': 'Unspecified'} return self.attr_struct(0, type_map) @@ -743,6 +743,49 @@ class Lv(LvCommon): cb, cbe, return_tuple=False) cfg.worker_q.put(r) + @staticmethod + def _writecache_lv(lv_uuid, lv_name, lv_object_path, cache_options): + # Make sure we have a dbus object representing it + dbo = LvCommon.validate_dbus_object(lv_uuid, lv_name) + + # Make sure we have dbus object representing lv to cache + lv_to_cache = cfg.om.get_object_by_path(lv_object_path) + + if lv_to_cache: + fcn = lv_to_cache.lv_full_name() + rc, out, err = cmdhandler.lv_writecache_lv( + dbo.lv_full_name(), fcn, cache_options) + if rc == 0: + # When we cache an LV, the cache pool and the lv that is getting + # cached need to be removed from the object manager and + # re-created as their interfaces have changed! + mt_remove_dbus_objects((dbo, lv_to_cache)) + cfg.load() + + lv_converted = cfg.om.get_object_path_by_lvm_id(fcn) + else: + raise dbus.exceptions.DBusException( + LV_INTERFACE, + 'Exit code %s, stderr = %s' % (str(rc), err)) + else: + raise dbus.exceptions.DBusException( + LV_INTERFACE, 'LV to cache with object path %s not present!' % + lv_object_path) + return lv_converted + + @dbus.service.method( + dbus_interface=LV_INTERFACE, + in_signature='oia{sv}', + out_signature='(oo)', + async_callbacks=('cb', 'cbe')) + def WriteCacheLv(self, lv_object, tmo, cache_options, cb, cbe): + r = RequestEntry( + tmo, Lv._writecache_lv, + (self.Uuid, self.lvm_id, lv_object, + cache_options), cb, cbe) + cfg.worker_q.put(r) + + # noinspection PyPep8Naming @utils.dbus_property(VDO_POOL_INTERFACE, 'OperatingMode', 's') @utils.dbus_property(VDO_POOL_INTERFACE, 'CompressionState', 's') diff --git a/daemons/lvmdbusd/manager.py b/daemons/lvmdbusd/manager.py index 2857e9a..573a396 100644 --- a/daemons/lvmdbusd/manager.py +++ b/daemons/lvmdbusd/manager.py @@ -27,7 +27,7 @@ class Manager(AutomatedProperties): @property def Version(self): - return dbus.String('1.0.0') + return dbus.String('1.1.0') @staticmethod def handle_execute(rc, out, err): diff --git a/device_mapper/all.h b/device_mapper/all.h index f00b6a5..c3c6219 100644 --- a/device_mapper/all.h +++ b/device_mapper/all.h @@ -951,6 +951,8 @@ struct writecache_settings { uint64_t autocommit_time; /* in milliseconds */ uint32_t fua; uint32_t nofua; + uint32_t cleaner; + uint32_t max_age; /* * Allow an unrecognized key and its val to be passed to the kernel for @@ -970,6 +972,8 @@ struct writecache_settings { unsigned autocommit_time_set:1; unsigned fua_set:1; unsigned nofua_set:1; + unsigned cleaner_set:1; + unsigned max_age_set:1; }; int dm_tree_node_add_writecache_target(struct dm_tree_node *node, diff --git a/device_mapper/libdm-deptree.c b/device_mapper/libdm-deptree.c index 9ba24cb..2722a2c 100644 --- a/device_mapper/libdm-deptree.c +++ b/device_mapper/libdm-deptree.c @@ -2670,6 +2670,10 @@ static int _writecache_emit_segment_line(struct dm_task *dmt, count += 1; if (seg->writecache_settings.nofua_set) count += 1; + if (seg->writecache_settings.cleaner_set && seg->writecache_settings.cleaner) + count += 1; + if (seg->writecache_settings.max_age_set) + count += 2; if (seg->writecache_settings.new_key) count += 2; @@ -2713,6 +2717,14 @@ static int _writecache_emit_segment_line(struct dm_task *dmt, EMIT_PARAMS(pos, " nofua"); } + if (seg->writecache_settings.cleaner_set && seg->writecache_settings.cleaner) { + EMIT_PARAMS(pos, " cleaner"); + } + + if (seg->writecache_settings.max_age_set) { + EMIT_PARAMS(pos, " max_age %u", seg->writecache_settings.max_age); + } + if (seg->writecache_settings.new_key) { EMIT_PARAMS(pos, " %s %s", seg->writecache_settings.new_key, diff --git a/include/configure.h.in b/include/configure.h.in index 57736cc..540cee7 100644 --- a/include/configure.h.in +++ b/include/configure.h.in @@ -531,6 +531,9 @@ /* Define to 1 if the system has the `__builtin_clzll' built-in function */ #undef HAVE___BUILTIN_CLZLL +/* Define to 1 to include built-in support for integrity. */ +#undef INTEGRITY_INTERNAL + /* Internalization package */ #undef INTL_PACKAGE @@ -678,9 +681,6 @@ /* Define to 1 to include built-in support for writecache. */ #undef WRITECACHE_INTERNAL -/* Define to 1 to include built-in support for integrity. */ -#undef INTEGRITY_INTERNAL - /* Define to get access to GNU/Linux extension */ #undef _GNU_SOURCE diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 6cb5ff0..b1d05fb 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -84,6 +84,7 @@ static DM_LIST_INIT(_unused_duplicates); static DM_LIST_INIT(_prev_unused_duplicate_devs); static int _vgs_locked = 0; static int _found_duplicate_vgnames = 0; +static int _outdated_warning = 0; int lvmcache_init(struct cmd_context *cmd) { @@ -1776,6 +1777,9 @@ int lvmcache_update_vg_from_read(struct volume_group *vg, unsigned precommitted) log_warn("WARNING: outdated PV %s seqno %u has been removed in current VG %s seqno %u.", dev_name(info->dev), info->summary_seqno, vg->name, vginfo->seqno); + if (!_outdated_warning++) + log_warn("See vgck --updatemetadata to clear outdated metadata."); + _drop_vginfo(info, vginfo); /* remove from vginfo->infos */ dm_list_add(&vginfo->outdated_infos, &info->list); } diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h index dce9705..b38ca11 100644 --- a/lib/config/config_settings.h +++ b/lib/config/config_settings.h @@ -626,7 +626,7 @@ cfg(allocation_cache_pool_max_chunks_CFG, "cache_pool_max_chunks", allocation_CF "Using cache pool with more chunks may degrade cache performance.\n") cfg(allocation_thin_pool_metadata_require_separate_pvs_CFG, "thin_pool_metadata_require_separate_pvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS, vsn(2, 2, 89), NULL, 0, NULL, - "Thin pool metdata and data will always use different PVs.\n") + "Thin pool metadata and data will always use different PVs.\n") cfg(allocation_thin_pool_zero_CFG, "thin_pool_zero", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_ZERO, vsn(2, 2, 99), NULL, 0, NULL, "Thin pool data chunks are zeroed before they are first used.\n" @@ -657,6 +657,9 @@ cfg(allocation_thin_pool_chunk_size_policy_CFG, "thin_pool_chunk_size_policy", a " 512KiB.\n" "#\n") +cfg(allocation_zero_metadata_CFG, "zero_metadata", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ZERO_METADATA, vsn(2, 3, 10), NULL, 0, NULL, + "Zero whole metadata area before use with thin or cache pool.\n") + cfg_runtime(allocation_thin_pool_chunk_size_CFG, "thin_pool_chunk_size", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT, vsn(2, 2, 99), 0, NULL, "The minimal chunk size in KiB for thin pool volumes.\n" "Larger chunk sizes may improve performance for plain thin volumes,\n" diff --git a/lib/config/defaults.h b/lib/config/defaults.h index be4f5ff..708a575 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -129,6 +129,7 @@ #define DEFAULT_THIN_POOL_DISCARDS "passdown" #define DEFAULT_THIN_POOL_ZERO 1 #define DEFAULT_POOL_METADATA_SPARE 1 /* thin + cache */ +#define DEFAULT_ZERO_METADATA 1 /* thin + cache */ #ifdef CACHE_CHECK_NEEDS_CHECK # define DEFAULT_CACHE_CHECK_OPTION1 "-q" diff --git a/lib/device/bcache.c b/lib/device/bcache.c index a7d8055..7e7e185 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -950,7 +950,7 @@ static struct block *_new_block(struct bcache *cache, int fd, block_address i, b struct block *b; b = _alloc_block(cache); - while (!b && !dm_list_empty(&cache->clean)) { + while (!b) { b = _find_unused_clean_block(cache); if (!b) { if (can_wait) { diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c index 6af559c..c3f7c49 100644 --- a/lib/device/dev-cache.c +++ b/lib/device/dev-cache.c @@ -65,6 +65,7 @@ static int _insert(const char *path, const struct stat *info, static void _dev_init(struct device *dev) { dev->fd = -1; + dev->bcache_fd = -1; dev->read_ahead = -1; dev->ext.enabled = 0; diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c index 9d0a363..23ce41a 100644 --- a/lib/device/dev-md.c +++ b/lib/device/dev-md.c @@ -16,6 +16,7 @@ #include "lib/misc/lib.h" #include "lib/device/dev-type.h" #include "lib/mm/xlate.h" +#include "lib/misc/crc.h" #ifdef UDEV_SYNC_SUPPORT #include /* for MD detection using udev db records */ #include "lib/device/dev-ext-udev-constants.h" @@ -48,44 +49,89 @@ static int _dev_has_md_magic(struct device *dev, uint64_t sb_offset) return 0; } -/* - * Calculate the position of the superblock. - * It is always aligned to a 4K boundary and - * depending on minor_version, it can be: - * 0: At least 8K, but less than 12K, from end of device - * 1: At start of device - * 2: 4K from start of device. - */ -typedef enum { - MD_MINOR_VERSION_MIN, - MD_MINOR_V0 = MD_MINOR_VERSION_MIN, - MD_MINOR_V1, - MD_MINOR_V2, - MD_MINOR_VERSION_MAX = MD_MINOR_V2 -} md_minor_version_t; - -static uint64_t _v1_sb_offset(uint64_t size, md_minor_version_t minor_version) +#define IMSM_SIGNATURE "Intel Raid ISM Cfg Sig. " +#define IMSM_SIG_LEN (strlen(IMSM_SIGNATURE)) + +static int _dev_has_imsm_magic(struct device *dev, uint64_t devsize_sectors) { - uint64_t sb_offset; + char imsm_signature[IMSM_SIG_LEN]; + uint64_t off = (devsize_sectors * 512) - 1024; - switch(minor_version) { - case MD_MINOR_V0: - sb_offset = (size - 8 * 2) & ~(4 * 2 - 1ULL); - break; - case MD_MINOR_V1: - sb_offset = 0; - break; - case MD_MINOR_V2: - sb_offset = 4 * 2; - break; - default: - log_warn(INTERNAL_ERROR "WARNING: Unknown minor version %d.", - minor_version); + if (!dev_read_bytes(dev, off, IMSM_SIG_LEN, imsm_signature)) + return_0; + + if (!memcmp(imsm_signature, IMSM_SIGNATURE, IMSM_SIG_LEN)) + return 1; + + return 0; +} + +#define DDF_MAGIC 0xDE11DE11 +struct ddf_header { + uint32_t magic; + uint32_t crc; + char guid[24]; + char revision[8]; + char padding[472]; +}; + +static int _dev_has_ddf_magic(struct device *dev, uint64_t devsize_sectors, uint64_t *sb_offset) +{ + struct ddf_header hdr; + uint32_t crc, our_crc; + uint64_t off; + uint64_t devsize_bytes = devsize_sectors * 512; + + if (devsize_bytes < 0x30000) return 0; + + /* 512 bytes before the end of device (from libblkid) */ + off = ((devsize_bytes / 0x200) - 1) * 0x200; + + if (!dev_read_bytes(dev, off, 512, &hdr)) + return_0; + + if ((hdr.magic == cpu_to_be32(DDF_MAGIC)) || + (hdr.magic == cpu_to_le32(DDF_MAGIC))) { + crc = hdr.crc; + hdr.crc = 0xffffffff; + our_crc = calc_crc(0, (const uint8_t *)&hdr, 512); + + if ((cpu_to_be32(our_crc) == crc) || + (cpu_to_le32(our_crc) == crc)) { + *sb_offset = off; + return 1; + } else { + log_debug_devs("Found md ddf magic at %llu wrong crc %x disk %x %s", + (unsigned long long)off, our_crc, crc, dev_name(dev)); + return 0; + } + } + + /* 128KB before the end of device (from libblkid) */ + off = ((devsize_bytes / 0x200) - 257) * 0x200; + + if (!dev_read_bytes(dev, off, 512, &hdr)) + return_0; + + if ((hdr.magic == cpu_to_be32(DDF_MAGIC)) || + (hdr.magic == cpu_to_le32(DDF_MAGIC))) { + crc = hdr.crc; + hdr.crc = 0xffffffff; + our_crc = calc_crc(0, (const uint8_t *)&hdr, 512); + + if ((cpu_to_be32(our_crc) == crc) || + (cpu_to_le32(our_crc) == crc)) { + *sb_offset = off; + return 1; + } else { + log_debug_devs("Found md ddf magic at %llu wrong crc %x disk %x %s", + (unsigned long long)off, our_crc, crc, dev_name(dev)); + return 0; + } } - sb_offset <<= SECTOR_SHIFT; - return sb_offset; + return 0; } /* @@ -130,7 +176,6 @@ static int _udev_dev_is_md_component(struct device *dev) */ static int _native_dev_is_md_component(struct device *dev, uint64_t *offset_found, int full) { - md_minor_version_t minor; uint64_t size, sb_offset; int ret; @@ -146,9 +191,9 @@ static int _native_dev_is_md_component(struct device *dev, uint64_t *offset_foun return 0; /* - * Old md versions locate the magic number at the end of the device. - * Those checks can't be satisfied with the initial bcache data, and - * would require an extra read i/o at the end of every device. Issuing + * Some md versions locate the magic number at the end of the device. + * Those checks can't be satisfied with the initial scan data, and + * require an extra read i/o at the end of every device. Issuing * an extra read to every device in every command, just to check for * the old md format is a bad tradeoff. * @@ -159,42 +204,81 @@ static int _native_dev_is_md_component(struct device *dev, uint64_t *offset_foun * and set it for commands that could possibly write to an md dev * (pvcreate/vgcreate/vgextend). */ - if (!full) { - sb_offset = 0; - if (_dev_has_md_magic(dev, sb_offset)) { - log_debug_devs("Found md magic number at offset 0 of %s.", dev_name(dev)); - ret = 1; - goto out; - } - sb_offset = 8 << SECTOR_SHIFT; - if (_dev_has_md_magic(dev, sb_offset)) { - log_debug_devs("Found md magic number at offset %d of %s.", (int)sb_offset, dev_name(dev)); - ret = 1; - goto out; - } + /* + * md superblock version 1.1 at offset 0 from start + */ + + if (_dev_has_md_magic(dev, 0)) { + log_debug_devs("Found md magic number at offset 0 of %s.", dev_name(dev)); + ret = 1; + goto out; + } + /* + * md superblock version 1.2 at offset 4KB from start + */ + + if (_dev_has_md_magic(dev, 4096)) { + log_debug_devs("Found md magic number at offset 4096 of %s.", dev_name(dev)); + ret = 1; + goto out; + } + + if (!full) { ret = 0; goto out; } - /* Check if it is an md component device. */ - /* Version 0.90.0 */ + /* + * Handle superblocks at the end of the device. + */ + + /* + * md superblock version 0 at 64KB from end of device + * (after end is aligned to 64KB) + */ + sb_offset = MD_NEW_SIZE_SECTORS(size) << SECTOR_SHIFT; + if (_dev_has_md_magic(dev, sb_offset)) { + log_debug_devs("Found md magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev)); ret = 1; goto out; } - minor = MD_MINOR_VERSION_MIN; - /* Version 1, try v1.0 -> v1.2 */ - do { - sb_offset = _v1_sb_offset(size, minor); - if (_dev_has_md_magic(dev, sb_offset)) { - ret = 1; - goto out; - } - } while (++minor <= MD_MINOR_VERSION_MAX); + /* + * md superblock version 1.0 at 8KB from end of device + */ + + sb_offset = ((size - 8 * 2) & ~(4 * 2 - 1ULL)) << SECTOR_SHIFT; + + if (_dev_has_md_magic(dev, sb_offset)) { + log_debug_devs("Found md magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev)); + ret = 1; + goto out; + } + + /* + * md imsm superblock 1K from end of device + */ + + if (_dev_has_imsm_magic(dev, size)) { + log_debug_devs("Found md imsm magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev)); + sb_offset = 1024; + ret = 1; + goto out; + } + + /* + * md ddf superblock 512 bytes from end, or 128KB from end + */ + + if (_dev_has_ddf_magic(dev, size, &sb_offset)) { + log_debug_devs("Found md ddf magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev)); + ret = 1; + goto out; + } ret = 0; out: diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c index deb5d6a..896821d 100644 --- a/lib/device/dev-type.c +++ b/lib/device/dev-type.c @@ -649,37 +649,23 @@ out: #ifdef BLKID_WIPING_SUPPORT int get_fs_block_size(struct device *dev, uint32_t *fs_block_size) { - blkid_probe probe = NULL; - const char *block_size_str = NULL; - uint64_t block_size_val; - int r = 0; + char *block_size_str = NULL; - *fs_block_size = 0; - - if (!(probe = blkid_new_probe_from_filename(dev_name(dev)))) { - log_error("Failed to create a new blkid probe for device %s.", dev_name(dev)); - goto out; + if ((block_size_str = blkid_get_tag_value(NULL, "BLOCK_SIZE", dev_name(dev)))) { + *fs_block_size = (uint32_t)atoi(block_size_str); + free(block_size_str); + log_debug("Found blkid BLOCK_SIZE %u for fs on %s", *fs_block_size, dev_name(dev)); + return 1; + } else { + log_debug("No blkid BLOCK_SIZE for fs on %s", dev_name(dev)); + *fs_block_size = 0; + return 0; } - - blkid_probe_enable_partitions(probe, 1); - - (void) blkid_probe_lookup_value(probe, "BLOCK_SIZE", &block_size_str, NULL); - - if (!block_size_str) - goto out; - - block_size_val = strtoull(block_size_str, NULL, 10); - - *fs_block_size = (uint32_t)block_size_val; - r = 1; -out: - if (probe) - blkid_free_probe(probe); - return r; } #else int get_fs_block_size(struct device *dev, uint32_t *fs_block_size) { + log_debug("Disabled blkid BLOCK_SIZE for fs."); *fs_block_size = 0; return 0; } diff --git a/lib/label/hints.c b/lib/label/hints.c index 9546f48..efa02f7 100644 --- a/lib/label/hints.c +++ b/lib/label/hints.c @@ -801,10 +801,8 @@ static int _read_hint_file(struct cmd_context *cmd, struct dm_list *hints, int * if (fclose(fp)) stack; - if (!ret) { - free_hints(hints); + if (!ret) return 0; - } if (!found) return 1; diff --git a/lib/metadata/cache_manip.c b/lib/metadata/cache_manip.c index 49b3850..a786e8b 100644 --- a/lib/metadata/cache_manip.c +++ b/lib/metadata/cache_manip.c @@ -1094,6 +1094,10 @@ int cache_vol_set_params(struct cmd_context *cmd, if (!meta_size) { meta_size = _cache_min_metadata_size(pool_lv->size, chunk_size); + /* fix bad value from _cache_min_metadata_size */ + if (meta_size > (pool_lv->size / 2)) + meta_size = pool_lv->size / 2; + if (meta_size < min_meta_size) meta_size = min_meta_size; diff --git a/lib/metadata/integrity_manip.c b/lib/metadata/integrity_manip.c index 7942be0..3322a21 100644 --- a/lib/metadata/integrity_manip.c +++ b/lib/metadata/integrity_manip.c @@ -21,7 +21,6 @@ #include "lib/metadata/segtype.h" #include "lib/activate/activate.h" #include "lib/config/defaults.h" -#include "lib/activate/dev_manager.h" #define DEFAULT_TAG_SIZE 4 /* bytes */ #define DEFAULT_MODE 'J' @@ -29,6 +28,7 @@ #define DEFAULT_BLOCK_SIZE 512 #define ONE_MB_IN_BYTES 1048576 +#define ONE_GB_IN_BYTES 1073741824 int lv_is_integrity_origin(const struct logical_volume *lv) { @@ -46,10 +46,35 @@ int lv_is_integrity_origin(const struct logical_volume *lv) /* * Every 500M of data needs 4M of metadata. * (From trial and error testing.) + * + * plus some initial space for journals. + * (again from trial and error testing.) */ static uint64_t _lv_size_bytes_to_integrity_meta_bytes(uint64_t lv_size_bytes) { - return ((lv_size_bytes / (500 * ONE_MB_IN_BYTES)) + 1) * (4 * ONE_MB_IN_BYTES); + uint64_t meta_bytes; + uint64_t initial_bytes; + + /* Every 500M of data needs 4M of metadata. */ + meta_bytes = ((lv_size_bytes / (500 * ONE_MB_IN_BYTES)) + 1) * (4 * ONE_MB_IN_BYTES); + + /* + * initial space used for journals + * lv_size <= 512M -> 4M + * lv_size <= 1G -> 8M + * lv_size <= 4G -> 32M + * lv_size > 4G -> 64M + */ + if (lv_size_bytes <= (512 * ONE_MB_IN_BYTES)) + initial_bytes = 4 * ONE_MB_IN_BYTES; + else if (lv_size_bytes <= ONE_GB_IN_BYTES) + initial_bytes = 8 * ONE_MB_IN_BYTES; + else if (lv_size_bytes <= (4ULL * ONE_GB_IN_BYTES)) + initial_bytes = 32 * ONE_MB_IN_BYTES; + else if (lv_size_bytes > (4ULL * ONE_GB_IN_BYTES)) + initial_bytes = 64 * ONE_MB_IN_BYTES; + + return meta_bytes + initial_bytes; } /* @@ -278,7 +303,7 @@ int lv_remove_integrity_from_raid(struct logical_volume *lv) return 1; } -static int _set_integrity_block_size(struct cmd_context *cmd, struct logical_volume *lv, +static int _set_integrity_block_size(struct cmd_context *cmd, struct logical_volume *lv, int is_active, struct integrity_settings *settings, int lbs_4k, int lbs_512, int pbs_4k, int pbs_512) { @@ -375,7 +400,13 @@ static int _set_integrity_block_size(struct cmd_context *cmd, struct logical_vol } if (!settings->block_size) { - if (fs_block_size <= 4096) + if (is_active && lbs_512) { + /* increasing the lbs from 512 to 4k under an active LV could cause problems + for an application that expects a given io size/alignment is possible. */ + settings->block_size = 512; + if (fs_block_size > 512) + log_print("Limiting integrity block size to 512 because the LV is active."); + } else if (fs_block_size <= 4096) settings->block_size = fs_block_size; else settings->block_size = 4096; /* dm-integrity max is 4096 */ @@ -587,13 +618,33 @@ int lv_add_integrity_to_raid(struct logical_volume *lv, struct integrity_setting } } + if (!is_active) { + /* checking block size of fs on the lv requires the lv to be active */ + if (!activate_lv(cmd, lv)) { + log_error("Failed to activate LV to check block size %s", display_lvname(lv)); + goto bad; + } + if (!sync_local_dev_names(cmd)) + stack; + } + /* * Set settings->block_size which will be copied to segment settings below. * integrity block size chosen based on device logical block size and * file system block size. */ - if (!_set_integrity_block_size(cmd, lv, settings, lbs_4k, lbs_512, pbs_4k, pbs_512)) + if (!_set_integrity_block_size(cmd, lv, is_active, settings, lbs_4k, lbs_512, pbs_4k, pbs_512)) { + if (!is_active && !deactivate_lv(cmd, lv)) + stack; goto_bad; + } + + if (!is_active) { + if (!deactivate_lv(cmd, lv)) { + log_error("Failed to deactivate LV after checking block size %s", display_lvname(lv)); + goto bad; + } + } /* * For each rimage, move its segments to a new rimage_iorig and give diff --git a/lib/metadata/lv.c b/lib/metadata/lv.c index 4ee58b4..fac47e5 100644 --- a/lib/metadata/lv.c +++ b/lib/metadata/lv.c @@ -1412,6 +1412,9 @@ char *lv_attr_dup_with_info_and_seg_status(struct dm_pool *mem, const struct lv_ } else if (lvdm->seg_status.type == SEG_STATUS_THIN) { if (lvdm->seg_status.thin->fail) repstr[8] = 'F'; + } else if (lvdm->seg_status.type == SEG_STATUS_WRITECACHE) { + if (lvdm->seg_status.writecache->error) + repstr[8] = 'E'; } else if (lvdm->seg_status.type == SEG_STATUS_UNKNOWN) repstr[8] = 'X'; /* Unknown */ diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c index 1311f70..f0ba3f0 100644 --- a/lib/metadata/lv_manip.c +++ b/lib/metadata/lv_manip.c @@ -5066,6 +5066,7 @@ static int _lvresize_check(struct logical_volume *lv, struct lvresize_params *lp) { struct volume_group *vg = lv->vg; + struct lv_segment *seg = first_seg(lv); if (lv_is_external_origin(lv)) { /* @@ -5089,6 +5090,12 @@ static int _lvresize_check(struct logical_volume *lv, return 0; } + if (seg && (seg_is_raid4(seg) || seg_is_any_raid5(seg)) && seg->area_count < 3) { + log_error("Cannot resize %s LV %s. Convert to more stripes first.", + lvseg_name(seg), display_lvname(lv)); + return 0; + } + if (lv_is_raid(lv) && lp->resize == LV_REDUCE) { unsigned attrs; @@ -6568,7 +6575,20 @@ int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv, } } - if (lv_is_used_cache_pool(lv) || lv_is_cache_vol(lv)) { + if (lv_is_cache_vol(lv)) { + if ((cache_seg = get_only_segment_using_this_lv(lv))) { + /* When used with cache, lvremove on cachevol also removes the cache! */ + if (seg_is_cache(cache_seg)) { + if (!lv_cache_remove(cache_seg->lv)) + return_0; + } else if (seg_is_writecache(cache_seg)) { + log_error("Detach cachevol before removing."); + return 0; + } + } + } + + if (lv_is_used_cache_pool(lv)) { /* Cache pool removal drops cache layer * If the cache pool is not linked, we can simply remove it. */ if (!(cache_seg = get_only_segment_using_this_lv(lv))) @@ -6832,7 +6852,7 @@ static int _lv_update_and_reload(struct logical_volume *lv, int origin_only) } if (!(origin_only ? suspend_lv_origin(vg->cmd, lock_lv) : suspend_lv(vg->cmd, lock_lv))) { - log_error("Failed to lock logical volume %s.", + log_error("Failed to suspend logical volume %s.", display_lvname(lock_lv)); vg_revert(vg); } else if (!(r = vg_commit(vg))) @@ -7556,20 +7576,22 @@ int wipe_lv(struct logical_volume *lv, struct wipe_params wp) struct device *dev; char name[PATH_MAX]; uint64_t zero_sectors; + int zero_metadata = wp.is_metadata ? + find_config_tree_bool(lv->vg->cmd, allocation_zero_metadata_CFG, NULL) : 0; - if (!wp.do_zero && !wp.do_wipe_signatures) + if (!wp.do_zero && !wp.do_wipe_signatures && !wp.is_metadata) /* nothing to do */ return 1; if (!lv_is_active(lv)) { - log_error("Volume \"%s/%s\" is not active locally (volume_list activation filter?).", - lv->vg->name, lv->name); + log_error("Volume %s is not active locally (volume_list activation filter?).", + display_lvname(lv)); return 0; } /* Wait until devices are available */ if (!sync_local_dev_names(lv->vg->cmd)) { - log_error("Failed to sync local devices before wiping LV %s.", + log_error("Failed to sync local devices before wiping volume %s.", display_lvname(lv)); return 0; } @@ -7593,40 +7615,59 @@ int wipe_lv(struct logical_volume *lv, struct wipe_params wp) } if (!label_scan_open_rw(dev)) { - log_error("Failed to open %s/%s for wiping and zeroing.", lv->vg->name, lv->name); - goto out; + log_error("Failed to open %s for wiping and zeroing.", display_lvname(lv)); + return 0; } if (wp.do_wipe_signatures) { - log_verbose("Wiping known signatures on logical volume \"%s/%s\"", - lv->vg->name, lv->name); + log_verbose("Wiping known signatures on logical volume %s.", + display_lvname(lv)); if (!wipe_known_signatures(lv->vg->cmd, dev, name, 0, TYPE_DM_SNAPSHOT_COW, - wp.yes, wp.force, NULL)) - stack; + wp.yes, wp.force, NULL)) { + log_error("Filed to wipe signatures of logical volume %s.", + display_lvname(lv)); + return 0; + } } - if (wp.do_zero) { - zero_sectors = wp.zero_sectors ? : UINT64_C(4096) >> SECTOR_SHIFT; - - if (zero_sectors > lv->size) + if (wp.do_zero || wp.is_metadata) { + zero_metadata = !wp.is_metadata ? 0 : + find_config_tree_bool(lv->vg->cmd, allocation_zero_metadata_CFG, NULL); + if (zero_metadata) { + log_debug("Metadata logical volume %s will be fully zeroed.", + display_lvname(lv)); zero_sectors = lv->size; + } else { + if (wp.is_metadata) /* Verbosely notify metadata will not be fully zeroed */ + log_verbose("Metadata logical volume %s not fully zeroed and may contain stale data.", + display_lvname(lv)); + zero_sectors = UINT64_C(4096) >> SECTOR_SHIFT; + if (wp.zero_sectors > zero_sectors) + zero_sectors = wp.zero_sectors; - log_verbose("Initializing %s of logical volume \"%s/%s\" with value %d.", - display_size(lv->vg->cmd, zero_sectors), - lv->vg->name, lv->name, wp.zero_value); + if (zero_sectors > lv->size) + zero_sectors = lv->size; + } - if (!wp.zero_value) { - if (!dev_write_zeros(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT)) - stack; - } else { - if (!dev_set_bytes(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT, (uint8_t)wp.zero_value)) - stack; + log_verbose("Initializing %s of logical volume %s with value %d.", + display_size(lv->vg->cmd, zero_sectors), + display_lvname(lv), wp.zero_value); + + if ((!wp.is_metadata && + wp.zero_value && !dev_set_bytes(dev, UINT64_C(0), + (size_t) zero_sectors << SECTOR_SHIFT, + (uint8_t)wp.zero_value)) || + !dev_write_zeros(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT)) { + log_error("Failed to initialize %s of logical volume %s with value %d.", + display_size(lv->vg->cmd, zero_sectors), + display_lvname(lv), wp.zero_value); + return 0; } } label_scan_invalidate(dev); -out: + lv->status &= ~LV_NOSCAN; return 1; @@ -7690,12 +7731,10 @@ int activate_and_wipe_lvlist(struct dm_list *lv_list, int commit) } dm_list_iterate_items(lvl, lv_list) { - log_verbose("Wiping metadata area %s.", display_lvname(lvl->lv)); /* Wipe any know signatures */ - if (!wipe_lv(lvl->lv, (struct wipe_params) { .do_wipe_signatures = 1, .do_zero = 1, .zero_sectors = 1 })) { - log_error("Failed to wipe %s.", display_lvname(lvl->lv)); + if (!wipe_lv(lvl->lv, (struct wipe_params) { .do_zero = 1 /* TODO: is_metadata = 1 */ })) { r = 0; - goto out; + goto_out; } } out: @@ -8440,7 +8479,8 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, .do_zero = lp->zero, .do_wipe_signatures = lp->wipe_signatures, .yes = lp->yes, - .force = lp->force + .force = lp->force, + .is_metadata = lp->is_metadata, })) { log_error("Aborting. Failed to wipe %s.", lp->snapshot ? "snapshot exception store" : "start of new LV"); diff --git a/lib/metadata/merge.c b/lib/metadata/merge.c index ecd55ef..1d47449 100644 --- a/lib/metadata/merge.c +++ b/lib/metadata/merge.c @@ -441,7 +441,8 @@ static void _check_lv_segment(struct logical_volume *lv, struct lv_segment *seg, if (seg_is_mirror(seg)) { if (!seg->region_size) seg_error("region size is zero"); - else if (seg->region_size > seg->lv->size) + /* Avoid regionsize check in case of 'mirrored' mirror log or larger than mlog regionsize will fail */ + else if (!strstr(seg->lv->name, "_mlog") && (seg->region_size > seg->lv->size)) seg_error("region size is bigger then LV itself"); else if (!is_power_of_2(seg->region_size)) seg_error("region size is non power of 2"); diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 083f74a..06ea757 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -89,8 +89,7 @@ #define PARTIAL_LV UINT64_C(0x0000000001000000) /* LV - derived flag, not written out in metadata*/ -//#define POSTORDER_FLAG UINT64_C(0x0000000002000000) /* Not real flags, reserved for -//#define POSTORDER_OPEN_FLAG UINT64_C(0x0000000004000000) temporary use inside vg_read_internal. */ +#define WRITECACHE_ORIGIN UINT64_C(0x0000000002000000) #define INTEGRITY_METADATA UINT64_C(0x0000000004000000) /* LV - Internal use only */ #define VIRTUAL_ORIGIN UINT64_C(0x0000000008000000) /* LV - internal use only */ @@ -804,6 +803,7 @@ struct wipe_params { int do_wipe_signatures; /* should we wipe known signatures found on LV? */ int yes; /* answer yes automatically to all questions */ force_t force; /* force mode */ + int is_metadata; /* wipe volume is metadata LV */ }; /* Zero out LV and/or wipe signatures */ @@ -955,6 +955,8 @@ struct lvcreate_params { int thin_chunk_size_calc_policy; unsigned suppress_zero_warn : 1; unsigned needs_lockd_init : 1; + unsigned ignore_type : 1; + unsigned is_metadata : 1; /* created LV will be used as metadata LV (and can be zeroed) */ const char *vg_name; /* only-used when VG is not yet opened (in /tools) */ const char *lv_name; /* all */ @@ -1097,6 +1099,7 @@ int lv_is_cow(const struct logical_volume *lv); int lv_is_cache_origin(const struct logical_volume *lv); int lv_is_writecache_origin(const struct logical_volume *lv); int lv_is_writecache_cachevol(const struct logical_volume *lv); +int writecache_settings_to_str_list(struct writecache_settings *settings, struct dm_list *result, struct dm_pool *mem); int lv_is_integrity_origin(const struct logical_volume *lv); diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 4b8dce9..c0d4206 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -4875,8 +4875,10 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, } } - if (found_old_metadata) + if (found_old_metadata) { log_warn("WARNING: Inconsistent metadata found for VG %s.", vgname); + log_warn("See vgck --updatemetadata to correct inconsistency."); + } vg = NULL; diff --git a/lib/metadata/pool_manip.c b/lib/metadata/pool_manip.c index bed51f1..23b5b63 100644 --- a/lib/metadata/pool_manip.c +++ b/lib/metadata/pool_manip.c @@ -545,8 +545,8 @@ int create_pool(struct logical_volume *pool_lv, display_lvname(pool_lv)); goto bad; } - /* Clear 4KB of pool metadata device. */ - if (!(r = wipe_lv(pool_lv, (struct wipe_params) { .do_zero = 1 }))) { + /* Clear pool metadata device. */ + if (!(r = wipe_lv(pool_lv, (struct wipe_params) { .is_metadata = 1 }))) { log_error("Aborting. Failed to wipe pool metadata %s.", display_lvname(pool_lv)); } @@ -627,6 +627,7 @@ struct logical_volume *alloc_pool_metadata(struct logical_volume *pool_lv, .tags = DM_LIST_HEAD_INIT(lvc.tags), .temporary = 1, .zero = 1, + .is_metadata = 1, }; if (!(lvc.segtype = get_segtype_from_string(pool_lv->vg->cmd, SEG_TYPE_NAME_STRIPED))) @@ -663,6 +664,7 @@ static struct logical_volume *_alloc_pool_metadata_spare(struct volume_group *vg .tags = DM_LIST_HEAD_INIT(lp.tags), .temporary = 1, .zero = 1, + .is_metadata = 1, }; if (!(lp.segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_STRIPED))) diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c index 3b3e1d3..1ff2a0c 100644 --- a/lib/metadata/raid_manip.c +++ b/lib/metadata/raid_manip.c @@ -3229,6 +3229,11 @@ int lv_raid_change_image_count(struct logical_volume *lv, int yes, uint32_t new_ const char *level = seg->area_count == 1 ? "raid1 with " : ""; const char *resil = new_count < seg->area_count ? "reducing" : "enhancing"; + if (new_count == seg->area_count) { + log_warn("Type %s LV %s already has %u images.", lvseg_name(seg), display_lvname(lv), new_count); + return 1; + } + /* LV must be active to perform raid conversion operations */ if (!lv_is_active(lv)) { log_error("%s must be active to perform this operation.", diff --git a/lib/metadata/snapshot_manip.c b/lib/metadata/snapshot_manip.c index 3faea0e..0f48e62 100644 --- a/lib/metadata/snapshot_manip.c +++ b/lib/metadata/snapshot_manip.c @@ -389,8 +389,6 @@ int validate_snapshot_origin(const struct logical_volume *origin_lv) err = "raid subvolumes"; } else if (lv_is_raid(origin_lv) && lv_raid_has_integrity((struct logical_volume *)origin_lv)) { err = "raid with integrity"; - } else if (lv_is_writecache(origin_lv)) { - err = "writecache"; } if (err) { diff --git a/lib/metadata/writecache_manip.c b/lib/metadata/writecache_manip.c index 31d069e..fade82e 100644 --- a/lib/metadata/writecache_manip.c +++ b/lib/metadata/writecache_manip.c @@ -21,11 +21,21 @@ #include "lib/metadata/segtype.h" #include "lib/activate/activate.h" #include "lib/config/defaults.h" +#include "lib/datastruct/str_list.h" int lv_is_writecache_origin(const struct logical_volume *lv) { struct lv_segment *seg; + /* + * This flag is needed when removing writecache from an origin + * in which case the lv connections have been destroyed and + * identifying a writecache origin by these connections doesn't + * work. + */ + if (lv->status & WRITECACHE_ORIGIN) + return 1; + /* Make sure there's exactly one segment in segs_using_this_lv! */ if (dm_list_empty(&lv->segs_using_this_lv) || (dm_list_size(&lv->segs_using_this_lv) > 1)) @@ -48,46 +58,6 @@ int lv_is_writecache_cachevol(const struct logical_volume *lv) return 0; } -static int _lv_writecache_detach(struct cmd_context *cmd, struct logical_volume *lv, - struct logical_volume *lv_fast) -{ - struct lv_segment *seg = first_seg(lv); - struct logical_volume *origin; - - if (!seg_is_writecache(seg)) { - log_error("LV %s segment is not writecache.", display_lvname(lv)); - return 0; - } - - if (!seg->writecache) { - log_error("LV %s writecache segment has no writecache.", display_lvname(lv)); - return 0; - } - - if (!(origin = seg_lv(seg, 0))) { - log_error("LV %s writecache segment has no origin", display_lvname(lv)); - return 0; - } - - if (!remove_seg_from_segs_using_this_lv(seg->writecache, seg)) - return_0; - - lv_set_visible(seg->writecache); - - lv->status &= ~WRITECACHE; - seg->writecache = NULL; - - lv_fast->status &= ~LV_CACHE_VOL; - - if (!remove_layer_from_lv(lv, origin)) - return_0; - - if (!lv_remove(origin)) - return_0; - - return 1; -} - static int _get_writecache_kernel_error(struct cmd_context *cmd, struct logical_volume *lv, uint32_t *kernel_error) @@ -131,13 +101,64 @@ fail: return 0; } -int lv_detach_writecache_cachevol(struct logical_volume *lv, int noflush) +static void _rename_detached_cvol(struct cmd_context *cmd, struct logical_volume *lv_fast) +{ + struct volume_group *vg = lv_fast->vg; + char cvol_name[NAME_LEN]; + char *suffix, *cvol_name_dup; + + /* + * Rename lv_fast back to its original name, without the _cvol + * suffix that was added when lv_fast was attached for caching. + * If the name is in use, generate new lvol%d. + * Failing to rename is not really a problem, so we intentionally + * do not consider some things here as errors. + */ + if (!dm_strncpy(cvol_name, lv_fast->name, sizeof(cvol_name)) || + !(suffix = strstr(cvol_name, "_cvol"))) { + log_debug("LV %s has no suffix for cachevol (skipping rename).", + display_lvname(lv_fast)); + return; + } + + *suffix = 0; + if (lv_name_is_used_in_vg(vg, cvol_name, NULL) && + !generate_lv_name(vg, "lvol%d", cvol_name, sizeof(cvol_name))) { + log_warn("Failed to generate new unique name for unused LV %s", lv_fast->name); + return; + } + + if (!(cvol_name_dup = dm_pool_strdup(vg->vgmem, cvol_name))) { + stack; + return; + } + + lv_fast->name = cvol_name_dup; +} + +static int _lv_detach_writecache_cachevol_inactive(struct logical_volume *lv, int noflush) { struct cmd_context *cmd = lv->vg->cmd; + struct volume_group *vg = lv->vg; struct logical_volume *lv_fast; + struct logical_volume *lv_wcorig; + struct lv_segment *seg = first_seg(lv); uint32_t kernel_error = 0; - lv_fast = first_seg(lv)->writecache; + if (!seg_is_writecache(seg)) { + log_error("LV %s segment is not writecache.", display_lvname(lv)); + return 0; + } + + if (!(lv_fast = seg->writecache)) { + log_error("LV %s writecache segment has no writecache.", display_lvname(lv)); + return 0; + } + + if (!(lv_wcorig = seg_lv(seg, 0))) { + log_error("LV %s writecache segment has no origin", display_lvname(lv)); + return 0; + } if (noflush) goto detach; @@ -157,6 +178,8 @@ int lv_detach_writecache_cachevol(struct logical_volume *lv, int noflush) if (!sync_local_dev_names(cmd)) { log_error("Failed to sync local devices before detaching writecache."); + if (!deactivate_lv(cmd, lv)) + log_error("Failed to deactivate %s.", display_lvname(lv)); return 0; } @@ -176,7 +199,8 @@ int lv_detach_writecache_cachevol(struct logical_volume *lv, int noflush) if (kernel_error) { log_error("Failed to flush writecache (error %u) for %s.", kernel_error, display_lvname(lv)); - deactivate_lv(cmd, lv); + if (!deactivate_lv(cmd, lv)) + log_error("Failed to deactivate %s.", display_lvname(lv)); return 0; } @@ -188,11 +212,262 @@ int lv_detach_writecache_cachevol(struct logical_volume *lv, int noflush) lv->status &= ~LV_TEMPORARY; detach: - if (!_lv_writecache_detach(cmd, lv, lv_fast)) { - log_error("Failed to detach writecache from %s", display_lvname(lv)); + if (!remove_seg_from_segs_using_this_lv(lv_fast, seg)) + return_0; + + lv->status &= ~WRITECACHE; + seg->writecache = NULL; + + if (!remove_layer_from_lv(lv, lv_wcorig)) + return_0; + + if (!lv_remove(lv_wcorig)) + return_0; + + lv_set_visible(lv_fast); + lv_fast->status &= ~LV_CACHE_VOL; + + _rename_detached_cvol(cmd, lv_fast); + + if (!vg_write(vg) || !vg_commit(vg)) + return_0; + + return 1; +} + +static int _lv_detach_writecache_cachevol_active(struct logical_volume *lv, int noflush) +{ + struct cmd_context *cmd = lv->vg->cmd; + struct volume_group *vg = lv->vg; + struct logical_volume *lv_fast; + struct logical_volume *lv_wcorig; + struct logical_volume *lv_old; + struct lv_segment *seg = first_seg(lv); + uint32_t kernel_error = 0; + + if (!seg_is_writecache(seg)) { + log_error("LV %s segment is not writecache.", display_lvname(lv)); + return 0; + } + + if (!(lv_fast = seg->writecache)) { + log_error("LV %s writecache segment has no writecache.", display_lvname(lv)); return 0; } + if (!(lv_wcorig = seg_lv(seg, 0))) { + log_error("LV %s writecache segment has no origin", display_lvname(lv)); + return 0; + } + + if (noflush) + goto detach; + + if (!lv_writecache_message(lv, "flush_on_suspend")) { + log_error("Failed to set flush_on_suspend in writecache detach %s.", display_lvname(lv)); + return 0; + } + + detach: + if (!remove_seg_from_segs_using_this_lv(lv_fast, seg)) { + log_error("Failed to remove seg in writecache detach."); + return 0; + } + + lv->status &= ~WRITECACHE; + seg->writecache = NULL; + + if (!remove_layer_from_lv(lv, lv_wcorig)) { + log_error("Failed to remove lv layer in writecache detach."); + return 0; + } + + /* + * vg_write(), suspend_lv(), vg_commit(), resume_lv(). + * usually done by lv_update_and_reload for an active lv, + * but in this case we need to check for writecache errors + * after suspend. + */ + + if (!vg_write(vg)) { + log_error("Failed to write VG in writecache detach."); + return 0; + } + + /* + * The version of LV before removal of writecache. When need to + * check for kernel errors based on the old version of LV which + * is still present in the kernel. + */ + if (!(lv_old = (struct logical_volume *)lv_committed(lv))) { + log_error("Failed to get lv_committed in writecache detach."); + return 0; + } + + /* + * suspend does not use 'lv' as we know it here, but grabs the + * old (precommitted) version of 'lv' using lv_committed(), + * which is from vg->vg_comitted. + */ + log_debug("Suspending writecache to detach %s", display_lvname(lv)); + + if (!suspend_lv(cmd, lv)) { + log_error("Failed to suspend LV in writecache detach."); + vg_revert(vg); + return 0; + } + + log_debug("Checking writecache errors to detach."); + + if (!_get_writecache_kernel_error(cmd, lv_old, &kernel_error)) { + log_error("Failed to get writecache error status for %s.", display_lvname(lv_old)); + return 0; + } + + if (kernel_error) { + log_error("Failed to flush writecache (error %u) for %s.", kernel_error, display_lvname(lv)); + return 0; + } + + if (!vg_commit(vg)) { + log_error("Failed to commit VG in writecache detach."); + return 0; + } + + /* + * Since vg_commit has happened, vg->vg_committed is now the + * newest copy of lv, so resume uses the 'lv' that we know + * here. + */ + log_debug("Resuming after writecache detached %s", display_lvname(lv)); + + if (!resume_lv(cmd, lv)) { + log_error("Failed to resume LV in writecache detach."); + return 0; + } + + log_debug("Deactivating previous cachevol %s", display_lvname(lv_fast)); + + if (!deactivate_lv(cmd, lv_fast)) + log_error("Failed to deactivate previous cachevol in writecache detach."); + + /* + * Needed for lv_is_writecache_origin to know lv_wcorig was + * a writecache origin, which is needed so that the -real + * dm uuid suffix is applied, which is needed for deactivate to + * work. This is a hacky roundabout way of setting the -real + * uuid suffix (it would be nice to have a deactivate command + * that accepts a dm uuid.) + */ + lv_wcorig->status |= WRITECACHE_ORIGIN; + + log_debug("Deactivating previous wcorig %s", display_lvname(lv_wcorig)); + + if (!lv_deactivate(cmd, NULL, lv_wcorig)) + log_error("Failed to deactivate previous wcorig LV in writecache detach."); + + log_debug("Removing previous wcorig %s", display_lvname(lv_wcorig)); + + if (!lv_remove(lv_wcorig)) { + log_error("Failed to remove previous wcorig LV in writecache detach."); + return 0; + } + + lv_set_visible(lv_fast); + lv_fast->status &= ~LV_CACHE_VOL; + + _rename_detached_cvol(cmd, lv_fast); + + if (!vg_write(vg) || !vg_commit(vg)) { + log_error("Failed to write and commit VG in writecache detach."); + return 0; + } + + return 1; +} + +int lv_detach_writecache_cachevol(struct logical_volume *lv, int noflush) +{ + if (lv_is_active(lv)) + return _lv_detach_writecache_cachevol_active(lv, noflush); + else + return _lv_detach_writecache_cachevol_inactive(lv, noflush); +} + +static int _writecache_setting_str_list_add(const char *field, uint64_t val, char *val_str, struct dm_list *result, struct dm_pool *mem) +{ + char buf[128]; + char *list_item; + int len; + + if (val_str) { + if (dm_snprintf(buf, sizeof(buf), "%s=%s", field, val_str) < 0) + return_0; + } else { + if (dm_snprintf(buf, sizeof(buf), "%s=%llu", field, (unsigned long long)val) < 0) + return_0; + } + + len = strlen(buf) + 1; + + if (!(list_item = dm_pool_zalloc(mem, len))) + return_0; + + memcpy(list_item, buf, len); + + if (!str_list_add_no_dup_check(mem, result, list_item)) + return_0; + + return 1; +} + +int writecache_settings_to_str_list(struct writecache_settings *settings, struct dm_list *result, struct dm_pool *mem) +{ + int errors = 0; + + if (settings->high_watermark_set) + if (!_writecache_setting_str_list_add("high_watermark", settings->high_watermark, NULL, result, mem)) + errors++; + + if (settings->low_watermark_set) + if (!_writecache_setting_str_list_add("low_watermark", settings->low_watermark, NULL, result, mem)) + errors++; + + if (settings->writeback_jobs_set) + if (!_writecache_setting_str_list_add("writeback_jobs", settings->writeback_jobs, NULL, result, mem)) + errors++; + + if (settings->autocommit_blocks_set) + if (!_writecache_setting_str_list_add("autocommit_blocks", settings->autocommit_blocks, NULL, result, mem)) + errors++; + + if (settings->autocommit_time_set) + if (!_writecache_setting_str_list_add("autocommit_time", settings->autocommit_time, NULL, result, mem)) + errors++; + + if (settings->fua_set) + if (!_writecache_setting_str_list_add("fua", (uint64_t)settings->fua, NULL, result, mem)) + errors++; + + if (settings->nofua_set) + if (!_writecache_setting_str_list_add("nofua", (uint64_t)settings->nofua, NULL, result, mem)) + errors++; + + if (settings->cleaner_set && settings->cleaner) + if (!_writecache_setting_str_list_add("cleaner", (uint64_t)settings->cleaner, NULL, result, mem)) + errors++; + + if (settings->max_age_set) + if (!_writecache_setting_str_list_add("max_age", (uint64_t)settings->max_age, NULL, result, mem)) + errors++; + + if (settings->new_key && settings->new_val) + if (!_writecache_setting_str_list_add(settings->new_key, 0, settings->new_val, result, mem)) + errors++; + + if (errors) + log_warn("Failed to create list of writecache settings."); + return 1; } diff --git a/lib/report/report.c b/lib/report/report.c index 170df69..979cbee 100644 --- a/lib/report/report.c +++ b/lib/report/report.c @@ -1430,6 +1430,16 @@ static int _cache_settings_disp(struct dm_report *rh, struct dm_pool *mem, struct _str_list_append_baton baton; struct dm_list dummy_list; /* dummy list to display "nothing" */ + if (seg_is_writecache(seg)) { + if (!(result = str_list_create(mem))) + return_0; + + if (!writecache_settings_to_str_list((struct writecache_settings *)&seg->writecache_settings, result, mem)) + return_0; + + return _field_set_string_list(rh, field, result, private, 0, NULL); + } + if (seg_is_cache(seg) && lv_is_cache_vol(seg->pool_lv)) setting_seg = seg; @@ -3802,6 +3812,12 @@ static int _lvhealthstatus_disp(struct dm_report *rh, struct dm_pool *mem, health = "failed"; else if (lvdm->seg_status.cache->read_only) health = "metadata_read_only"; + } else if (lv_is_writecache(lv) && (lvdm->seg_status.type != SEG_STATUS_NONE)) { + if (lvdm->seg_status.type != SEG_STATUS_WRITECACHE) + return _field_set_value(field, GET_FIRST_RESERVED_NAME(health_undef), + GET_FIELD_RESERVED_VALUE(health_undef)); + if (lvdm->seg_status.writecache->error) + health = "error"; } else if (lv_is_thin_pool(lv) && (lvdm->seg_status.type != SEG_STATUS_NONE)) { if (lvdm->seg_status.type != SEG_STATUS_THIN_POOL) return _field_set_value(field, GET_FIRST_RESERVED_NAME(health_undef), @@ -3945,7 +3961,7 @@ static int _vdo_ ## vdo_field_name ## _disp (struct dm_report *rh, struct dm_poo if (!seg_is_vdo_pool(seg)) \ return _field_set_value(field, "", &GET_TYPE_RESERVED_VALUE(num_undef_64)); \ \ - size = seg->vdo_params.vdo_field_name ## _mb * (1024 * 1024 >> SECTOR_SHIFT); \ + size = seg->vdo_params.vdo_field_name ## _mb * (UINT64_C(1024) * 1024 >> SECTOR_SHIFT); \ \ return _size64_disp(rh, mem, field, &size, private);\ } diff --git a/lib/writecache/writecache.c b/lib/writecache/writecache.c index 130922a..c7aea28 100644 --- a/lib/writecache/writecache.c +++ b/lib/writecache/writecache.c @@ -26,6 +26,9 @@ #include "lib/metadata/lv_alloc.h" #include "lib/config/defaults.h" +static int _writecache_cleaner_supported; +static int _writecache_max_age_supported; + #define SEG_LOG_ERROR(t, p...) \ log_error(t " segment %s of logical volume %s.", ## p, \ dm_config_parent_name(sn), seg->lv->name), 0; @@ -120,6 +123,18 @@ static int _writecache_text_import(struct lv_segment *seg, seg->writecache_settings.nofua_set = 1; } + if (dm_config_has_node(sn, "cleaner")) { + if (!dm_config_get_uint32(sn, "cleaner", &seg->writecache_settings.cleaner)) + return SEG_LOG_ERROR("Unknown writecache_setting in"); + seg->writecache_settings.cleaner_set = 1; + } + + if (dm_config_has_node(sn, "max_age")) { + if (!dm_config_get_uint32(sn, "max_age", &seg->writecache_settings.max_age)) + return SEG_LOG_ERROR("Unknown writecache_setting in"); + seg->writecache_settings.max_age_set = 1; + } + if (dm_config_has_node(sn, "writecache_setting_key")) { const char *key; const char *val; @@ -184,6 +199,14 @@ static int _writecache_text_export(const struct lv_segment *seg, outf(f, "nofua = %u", seg->writecache_settings.nofua); } + if (seg->writecache_settings.cleaner_set && seg->writecache_settings.cleaner) { + outf(f, "cleaner = %u", seg->writecache_settings.cleaner); + } + + if (seg->writecache_settings.max_age_set) { + outf(f, "max_age = %u", seg->writecache_settings.max_age); + } + if (seg->writecache_settings.new_key && seg->writecache_settings.new_val) { outf(f, "writecache_setting_key = \"%s\"", seg->writecache_settings.new_key); @@ -208,6 +231,7 @@ static int _target_present(struct cmd_context *cmd, { static int _writecache_checked = 0; static int _writecache_present = 0; + uint32_t maj, min, patchlevel; if (!activation()) return 0; @@ -215,6 +239,19 @@ static int _target_present(struct cmd_context *cmd, if (!_writecache_checked) { _writecache_checked = 1; _writecache_present = target_present(cmd, TARGET_NAME_WRITECACHE, 1); + + if (!target_version(TARGET_NAME_WRITECACHE, &maj, &min, &patchlevel)) + return_0; + + if (maj < 1) { + log_error("writecache target version older than minimum 1.0.0"); + return 0; + } + + if (min >= 2) { + _writecache_cleaner_supported = 1; + _writecache_max_age_supported = 1; + } } return _writecache_present; @@ -257,6 +294,18 @@ static int _writecache_add_target_line(struct dev_manager *dm, return 0; } + if (!_writecache_cleaner_supported && seg->writecache_settings.cleaner_set && seg->writecache_settings.cleaner) { + log_warn("WARNING: ignoring writecache setting \"cleaner\" which is not supported by kernel for LV %s.", seg->lv->name); + seg->writecache_settings.cleaner = 0; + seg->writecache_settings.cleaner_set = 0; + } + + if (!_writecache_max_age_supported && seg->writecache_settings.max_age_set) { + log_warn("WARNING: ignoring writecache setting \"max_age\" which is not supported by kernel for LV %s.", seg->lv->name); + seg->writecache_settings.max_age = 0; + seg->writecache_settings.max_age_set = 0; + } + if ((pmem = lv_on_pmem(seg->writecache)) < 0) return_0; diff --git a/man/lvconvert.8_pregen b/man/lvconvert.8_pregen index 7440984..b3902a5 100644 --- a/man/lvconvert.8_pregen +++ b/man/lvconvert.8_pregen @@ -23,6 +23,10 @@ lvconvert - Change logical volume layout .ad b .br .ad l + \fB--cachedevice\fP \fIPV\fP +.ad b +.br +.ad l \fB--cachemetadataformat\fP \fBauto\fP|\fB1\fP|\fB2\fP .ad b .br @@ -43,6 +47,10 @@ lvconvert - Change logical volume layout .ad b .br .ad l + \fB--cachesize\fP \fISize\fP[m|UNIT] +.ad b +.br +.ad l \fB--cachevol\fP \fILV\fP .ad b .br @@ -738,6 +746,44 @@ Attach a cache to an LV, converts the LV to type cache. .br - +Add a writecache to an LV, using a specified cache device. +.br +.P +\fBlvconvert\fP \fB--type\fP \fBwritecache\fP \fB--cachedevice\fP \fIPV\fP \fILV\fP\fI_linear_striped_raid\fP +.br +.RS 4 +.ad l +[ \fB--cachesize\fP \fISize\fP[m|UNIT] ] +.ad b +.br +.ad l +[ \fB--cachesettings\fP \fIString\fP ] +.ad b +.br +[ COMMON_OPTIONS ] +.RE +.br +- + +Add a cache to an LV, using a specified cache device. +.br +.P +\fBlvconvert\fP \fB--type\fP \fBcache\fP \fB--cachedevice\fP \fIPV\fP \fILV\fP\fI_linear_striped_thinpool_raid\fP +.br +.RS 4 +.ad l +[ \fB--cachesize\fP \fISize\fP[m|UNIT] ] +.ad b +.br +.ad l +[ \fB--cachesettings\fP \fIString\fP ] +.ad b +.br +[ COMMON_OPTIONS ] +.RE +.br +- + Convert LV to type thin-pool. .br .P @@ -1135,6 +1181,12 @@ See \fBlvmcache\fP(7) for more information about LVM caching. .ad b .HP .ad l +\fB--cachedevice\fP \fIPV\fP +.br +The name of a device to use for a cache. +.ad b +.HP +.ad l \fB--cachemetadataformat\fP \fBauto\fP|\fB1\fP|\fB2\fP .br Specifies the cache metadata format used by cache target. @@ -1182,6 +1234,12 @@ See \fBlvmcache\fP(7) for more information. .ad b .HP .ad l +\fB--cachesize\fP \fISize\fP[m|UNIT] +.br +The size of cache to use. +.ad b +.HP +.ad l \fB--cachevol\fP \fILV\fP .br The name of a cache volume. diff --git a/man/lvcreate.8_pregen b/man/lvcreate.8_pregen index be8e783..ee69034 100644 --- a/man/lvcreate.8_pregen +++ b/man/lvcreate.8_pregen @@ -31,6 +31,10 @@ lvcreate - Create a logical volume .ad b .br .ad l + \fB--cachedevice\fP \fIPV\fP +.ad b +.br +.ad l \fB--cachemetadataformat\fP \fBauto\fP|\fB1\fP|\fB2\fP .ad b .br @@ -51,6 +55,14 @@ lvcreate - Create a logical volume .ad b .br .ad l + \fB--cachesize\fP \fISize\fP[m|UNIT] +.ad b +.br +.ad l + \fB--cachevol\fP \fILV\fP +.ad b +.br +.ad l \fB-c\fP|\fB--chunksize\fP \fISize\fP[k|UNIT] .ad b .br @@ -816,11 +828,9 @@ where the new thin pool is named by the --thinpool arg. .RE - -Create a cache LV, first creating a new origin LV, +Create a new LV, then attach the specified cachepool .br -then combining it with the existing cache pool named -.br -by the --cachepool arg. +which converts the new LV to type cache. .br .P \fBlvcreate\fP \fB--type\fP \fBcache\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] @@ -881,6 +891,190 @@ by the --cachepool arg. .RE - +Create a new LV, then attach the specified cachevol +.br +which converts the new LV to type cache. +.br +.P +\fBlvcreate\fP \fB--type\fP \fBcache\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] +.RS 5 + \fB--cachevol\fP \fILV\fP \fIVG\fP +.RE +.br +.RS 4 +.ad l +[ \fB-l\fP|\fB--extents\fP \fINumber\fP[PERCENT] ] +.ad b +.br +.ad l +[ \fB-c\fP|\fB--chunksize\fP \fISize\fP[k|UNIT] ] +.ad b +.br +.ad l +[ \fB-i\fP|\fB--stripes\fP \fINumber\fP ] +.ad b +.br +.ad l +[ \fB-I\fP|\fB--stripesize\fP \fISize\fP[k|UNIT] ] +.ad b +.br +.ad l +[ \fB--cachemode\fP \fBwritethrough\fP|\fBwriteback\fP|\fBpassthrough\fP ] +.ad b +.br +.ad l +[ \fB--cachepolicy\fP \fIString\fP ] +.ad b +.br +.ad l +[ \fB--cachesettings\fP \fIString\fP ] +.ad b +.br +.ad l +[ \fB--cachemetadataformat\fP \fBauto\fP|\fB1\fP|\fB2\fP ] +.ad b +.br +[ COMMON_OPTIONS ] +.RE +.br +.RS 4 +[ \fIPV\fP ... ] +.RE +- + +Create a new LV, then attach a cachevol created from +.br +the specified cache device, which converts the +.br +new LV to type cache. +.br +.P +\fBlvcreate\fP \fB--type\fP \fBcache\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] +.RS 5 + \fB--cachedevice\fP \fIPV\fP \fIVG\fP +.RE +.br +.RS 4 +.ad l +[ \fB-l\fP|\fB--extents\fP \fINumber\fP[PERCENT] ] +.ad b +.br +.ad l +[ \fB-c\fP|\fB--chunksize\fP \fISize\fP[k|UNIT] ] +.ad b +.br +.ad l +[ \fB-i\fP|\fB--stripes\fP \fINumber\fP ] +.ad b +.br +.ad l +[ \fB-I\fP|\fB--stripesize\fP \fISize\fP[k|UNIT] ] +.ad b +.br +.ad l +[ \fB--cachemode\fP \fBwritethrough\fP|\fBwriteback\fP|\fBpassthrough\fP ] +.ad b +.br +.ad l +[ \fB--cachepolicy\fP \fIString\fP ] +.ad b +.br +.ad l +[ \fB--cachesettings\fP \fIString\fP ] +.ad b +.br +.ad l +[ \fB--cachemetadataformat\fP \fBauto\fP|\fB1\fP|\fB2\fP ] +.ad b +.br +.ad l +[ \fB--cachesize\fP \fISize\fP[m|UNIT] ] +.ad b +.br +[ COMMON_OPTIONS ] +.RE +.br +.RS 4 +[ \fIPV\fP ... ] +.RE +- + +Create a new LV, then attach the specified cachevol +.br +which converts the new LV to type writecache. +.br +.P +\fBlvcreate\fP \fB--type\fP \fBwritecache\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] +.RS 5 + \fB--cachevol\fP \fILV\fP \fIVG\fP +.RE +.br +.RS 4 +.ad l +[ \fB-l\fP|\fB--extents\fP \fINumber\fP[PERCENT] ] +.ad b +.br +.ad l +[ \fB-i\fP|\fB--stripes\fP \fINumber\fP ] +.ad b +.br +.ad l +[ \fB-I\fP|\fB--stripesize\fP \fISize\fP[k|UNIT] ] +.ad b +.br +.ad l +[ \fB--cachesettings\fP \fIString\fP ] +.ad b +.br +[ COMMON_OPTIONS ] +.RE +.br +.RS 4 +[ \fIPV\fP ... ] +.RE +- + +Create a new LV, then attach a cachevol created from +.br +the specified cache device, which converts the +.br +new LV to type writecache. +.br +.P +\fBlvcreate\fP \fB--type\fP \fBwritecache\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] +.RS 5 + \fB--cachedevice\fP \fIPV\fP \fIVG\fP +.RE +.br +.RS 4 +.ad l +[ \fB-l\fP|\fB--extents\fP \fINumber\fP[PERCENT] ] +.ad b +.br +.ad l +[ \fB-i\fP|\fB--stripes\fP \fINumber\fP ] +.ad b +.br +.ad l +[ \fB-I\fP|\fB--stripesize\fP \fISize\fP[k|UNIT] ] +.ad b +.br +.ad l +[ \fB--cachesize\fP \fISize\fP[m|UNIT] ] +.ad b +.br +.ad l +[ \fB--cachesettings\fP \fIString\fP ] +.ad b +.br +[ COMMON_OPTIONS ] +.RE +.br +.RS 4 +[ \fIPV\fP ... ] +.RE +- + Common options for command: . .RS 4 @@ -1091,6 +1285,12 @@ See \fBlvmcache\fP(7) for more information about LVM caching. .ad b .HP .ad l +\fB--cachedevice\fP \fIPV\fP +.br +The name of a device to use for a cache. +.ad b +.HP +.ad l \fB--cachemetadataformat\fP \fBauto\fP|\fB1\fP|\fB2\fP .br Specifies the cache metadata format used by cache target. @@ -1138,6 +1338,18 @@ See \fBlvmcache\fP(7) for more information. .ad b .HP .ad l +\fB--cachesize\fP \fISize\fP[m|UNIT] +.br +The size of cache to use. +.ad b +.HP +.ad l +\fB--cachevol\fP \fILV\fP +.br +The name of a cache volume. +.ad b +.HP +.ad l \fB-c\fP|\fB--chunksize\fP \fISize\fP[k|UNIT] .br The size of chunks in a snapshot, cache pool or thin pool. @@ -2659,11 +2871,11 @@ config setting sparse_segtype_default. .RE - -Create a cache LV, first creating a new origin LV, +Create a new LV, then attach the specified cachepool .br -then combining it with the existing cache pool named +which converts the new LV to type cache .br -by the --cachepool arg (variant, infers --type cache). +(variant, infers --type cache.) .br .P \fBlvcreate\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] \fB--cachepool\fP \fILV\fP\fI_cachepool\fP \fIVG\fP @@ -2717,11 +2929,11 @@ by the --cachepool arg (variant, infers --type cache). .RE - -Create a cache LV, first creating a new origin LV, +Create a new LV, then attach the specified cachepool .br -then combining it with the existing cache pool named +which converts the new LV to type cache. .br -in the first arg (variant, also use --cachepool). +(variant, also use --cachepool). .br .P \fBlvcreate\fP \fB--type\fP \fBcache\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] \fILV\fP\fI_cachepool\fP @@ -2779,19 +2991,15 @@ in the first arg (variant, also use --cachepool). .RE - -When LV is a cache pool, create a cache LV, -.br -first creating a new origin LV, then combining it with -.br -the existing cache pool named in the first arg +When the LV arg is a cachepool, then create a new LV and .br -(variant, infers --type cache, also use --cachepool). +attach the cachepool arg to it. .br -When LV is not a cache pool, convert the specified LV +(variant, use --type cache and --cachepool.) .br -to type cache after creating a new cache pool LV to use +When the LV arg is not a cachepool, then create a new cachepool .br -(use lvconvert). +and attach it to the LV arg (alternative, use lvconvert.) .br .P \fBlvcreate\fP \fB-H\fP|\fB--cache\fP \fB-L\fP|\fB--size\fP \fISize\fP[m|UNIT] \fILV\fP diff --git a/man/lvmcache.7_main b/man/lvmcache.7_main index 425904e..37d0e33 100644 --- a/man/lvmcache.7_main +++ b/man/lvmcache.7_main @@ -34,8 +34,6 @@ LVM refers to this using the LV type \fBwritecache\fP. .SH USAGE -Both kinds of caching use similar lvm commands: - .B 1. Identify main LV that needs caching The main LV may already exist, and is located on larger, slower devices. @@ -131,8 +129,35 @@ attached. LV VG Attr Type Devices fast vg -wi------- linear /dev/fast_ssd main vg -wi------- linear /dev/slow_hhd + +To stop caching the main LV and also remove unneeded cache pool, +use the --uncache: + +.nf + $ lvconvert --uncache vg/main + + $ lvs -a + LV VG Attr Type Devices + main vg -wi------- linear /dev/slow_hhd + .fi +.SS Create a new LV with caching. + +A new LV can be created with caching attached at the time of creation +using the following command: + +.nf +$ lvcreate --type cache|writecache -n Name -L Size + --cachedevice /dev/fast_ssd vg /dev/slow_hhd +.fi + +The main LV is created with the specified Name and Size from the slow_hhd. +A hidden fast LV is created on the fast_ssd and is then attached to the +new main LV. If the fast_ssd is unused, the entire disk will be used as +the cache unless the --cachesize option is used to specify a size for the +fast LV. The --cachedevice option can be repeated to use multiple disks +for the fast LV. .SH OPTIONS @@ -156,12 +181,26 @@ same fast LV. This option can be used with dm-writecache or dm-cache. Pass this option a cachepool LV or a standard LV. When using a cache pool, lvm places cache data and cache metadata on different LVs. The two -LVs together are called a cache pool. This permits specific placement of -data and metadata. A cache pool is represented as a special type of LV +LVs together are called a cache pool. This has a bit better performance +for dm-cache and permits specific placement and segment type selection +for data and metadata volumes. +A cache pool is represented as a special type of LV that cannot be used directly. If a standard LV is passed with this option, lvm will first convert it to a cache pool by combining it with another LV to use for metadata. This option can be used with dm-cache. +.B --cachedevice +.I PV +.br + +This option can be used in place of --cachevol, in which case a cachevol +LV will be created using the specified device. This option can be +repeated to create a cachevol using multiple devices, or a tag name can be +specified in which case the cachevol will be created using any of the +devices with the given tag. If a named cache device is unused, the entire +device will be used to create the cachevol. To create a cachevol of a +specific size from the cache devices, include the --cachesize option. + \& .SS dm-cache block size @@ -335,11 +374,16 @@ $ lvconvert --type cache --cachevol fast \\ The size of data blocks managed by dm-cache can be specified with the --chunksize option when caching is started. The default unit is KiB. The -value must be a multiple of 32KiB between 32KiB and 1GiB. +value must be a multiple of 32KiB between 32KiB and 1GiB. Cache chunks +bigger then 512KiB shall be only used when necessary. Using a chunk size that is too large can result in wasteful use of the cache, in which small reads and writes cause large sections of an LV to be -stored in the cache. However, choosing a chunk size that is too small +stored in the cache. It can also require increasing migration threshold +which defaults to 2048 sectors (1 MiB). Lvm2 ensures migration threshold is +at least 8 chunks in size. This may in some cases result in very +high bandwidth load of transfering data between the cache LV and its +cache origin LV. However, choosing a chunk size that is too small can result in more overhead trying to manage the numerous chunks that become mapped into the cache. Overhead can include both excessive CPU time searching for chunks, and excessive memory tracking chunks. @@ -357,6 +401,35 @@ The default value is shown by: .br .B lvmconfig --type default allocation/cache_pool_chunk_size +Checking migration threshold (in sectors) of running cached LV: +.br +.B lvs -o+kernel_cache_settings VG/LV + + +.SS dm-cache migration threshold + +\& + +Migrating data between the origin and cache LV uses bandwidth. +The user can set a throttle to prevent more than a certain amount of +migration occurring at any one time. Currently dm-cache is not taking any +account of normal io traffic going to the devices. + +User can set migration threshold via cache policy settings as +"migration_threshold=<#sectors>" to set the maximum number +of sectors being migrated, the default being 2048 sectors (1MiB). + +Command to set migration threshold to 2MiB (4096 sectors): +.br +.B lvcreate --cachepolicy 'migration_threshold=4096' VG/LV + + +Command to display the migration threshold: +.br +.B lvs -o+kernel_cache_settings,cache_settings VG/LV +.br +.B lvs -o+chunksize VG/LV + .SS dm-cache cache policy diff --git a/man/lvs.8_end b/man/lvs.8_end index 6efc9cb..5a4ecc8 100644 --- a/man/lvs.8_end +++ b/man/lvs.8_end @@ -74,5 +74,9 @@ Related to Thin Logical Volumes: (F)ailed. .br (F)ailed is set when related thin pool enters Failed state and no further I/O is permitted at all. +.IP +Related to writecache logical volumes: (E)rror. +.br +(E)rror is set dm-writecache reports an error. .IP 10 3 s(k)ip activation: this volume is flagged to be skipped during activation. diff --git a/man/lvs.8_pregen b/man/lvs.8_pregen index 8c3091d..8aea356 100644 --- a/man/lvs.8_pregen +++ b/man/lvs.8_pregen @@ -577,6 +577,10 @@ Related to Thin Logical Volumes: (F)ailed. .br (F)ailed is set when related thin pool enters Failed state and no further I/O is permitted at all. +.IP +Related to writecache logical volumes: (E)rror. +.br +(E)rror is set dm-writecache reports an error. .IP 10 3 s(k)ip activation: this volume is flagged to be skipped during activation. .SH SEE ALSO diff --git a/man/vgck.8_pregen b/man/vgck.8_pregen index a66de5d..2a1ec23 100644 --- a/man/vgck.8_pregen +++ b/man/vgck.8_pregen @@ -199,6 +199,15 @@ back metadata it believes has changed but hasn't. \fB--updatemetadata\fP .br Update VG metadata to correct problems. +If VG metadata was updated while a PV was missing, and the PV +reappears with an old version of metadata, then this option +(or any other command that writes metadata) will update the +metadata on the previously missing PV. If a PV was removed +from a VG while it was missing, and the PV reappears, using +this option will clear the outdated metadata from the previously +missing PV. If metadata text is damaged on one PV, using this +option will replace the damaged metadata text. For more severe +damage, e.g. with headers, see \fBpvck\fP(8). .ad b .HP .ad l diff --git a/scripts/blkdeactivate.sh.in b/scripts/blkdeactivate.sh.in index 57b3e58..7c517b8 100644 --- a/scripts/blkdeactivate.sh.in +++ b/scripts/blkdeactivate.sh.in @@ -330,6 +330,12 @@ deactivate_vdo() { test -b "$DEV_DIR/mapper/$xname" || return 0 test -z "${SKIP_DEVICE_LIST["$kname"]}" || return 1 + # Skip VDO device deactivation if VDO tools missing. + test "$VDO_AVAILABLE" -eq 0 && { + add_device_to_skip_list + return 1 + } + deactivate_holders "$DEV_DIR/mapper/$xname" || return 1 echo -n " [VDO]: deactivating VDO volume $xname... " diff --git a/test/dbus/lvmdbustest.py b/test/dbus/lvmdbustest.py index b2986bf..473bb94 100755 --- a/test/dbus/lvmdbustest.py +++ b/test/dbus/lvmdbustest.py @@ -1558,6 +1558,36 @@ class TestDbusService(unittest.TestCase): cached_lv.Lv.Rename(dbus.String(new_name), dbus.Int32(g_tmo), EOD)) verify_cache_lv_count() + def test_writecache_lv(self): + vg = self._vg_create().Vg + data_lv = self._create_lv(size=mib(16), vg=vg) + cache_lv = self._create_lv(size=mib(16), vg=vg) + + # both LVs need to be inactive + self.handle_return(data_lv.Lv.Deactivate( + dbus.UInt64(0), dbus.Int32(g_tmo), EOD)) + data_lv.update() + self.handle_return(cache_lv.Lv.Deactivate( + dbus.UInt64(0), dbus.Int32(g_tmo), EOD)) + cache_lv.update() + + cached_lv_path = self.handle_return( + cache_lv.Lv.WriteCacheLv( + dbus.ObjectPath(data_lv.object_path), + dbus.Int32(g_tmo), + EOD)) + + intf = (LV_COMMON_INT, LV_INT, CACHE_LV_INT) + cached_lv = ClientProxy(self.bus, cached_lv_path, interfaces=intf) + self.assertEqual(cached_lv.LvCommon.SegType, ["writecache"]) + + uncached_lv_path = self.handle_return( + cached_lv.CachedLv.DetachCachePool( + dbus.Boolean(True), + dbus.Int32(g_tmo), + EOD)) + self.assertTrue('/com/redhat/lvmdbus1/Lv' in uncached_lv_path) + def test_vg_change(self): vg_proxy = self._vg_create() diff --git a/test/lib/aux.sh b/test/lib/aux.sh index e40da95..17e7935 100644 --- a/test/lib/aux.sh +++ b/test/lib/aux.sh @@ -1234,6 +1234,7 @@ activation/verify_udev_operations = $LVM_VERIFY_UDEV activation/raid_region_size = 512 allocation/wipe_signatures_when_zeroing_new_lvs = 0 allocation/vdo_slab_size_mb = 128 +allocation/zero_metadata = 0 backup/archive = 0 backup/backup = 0 devices/cache_dir = "$TESTDIR/etc" diff --git a/test/shell/cachevol-cachedevice.sh b/test/shell/cachevol-cachedevice.sh new file mode 100644 index 0000000..3831ee9 --- /dev/null +++ b/test/shell/cachevol-cachedevice.sh @@ -0,0 +1,222 @@ +#!/usr/bin/env bash + +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +aux have_cache 1 10 0 || skip +aux have_writecache 1 0 0 || skip + +aux prepare_devs 4 64 + +vgcreate $SHARED $vg "$dev1" "$dev2" + +## cache + +# use existing cachevol +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvcreate --type cache -n $lv2 -L40M --cachevol $lv1 -y $vg "$dev2" +check lv_field $vg/$lv2 segtype cache +check lv_field $vg/${lv1}_cvol segtype linear -a +lvremove -y $vg/$lv2 + +# use entire cachedevice for cachevol +lvcreate --type cache -n $lv2 -L40M --cachedevice "$dev1" -y $vg "$dev2" +check lv_field $vg/$lv2 segtype cache +check lv_field $vg/${lv2}_cache_cvol segtype linear -a +lvremove -y $vg/$lv2 + +# use part of cachedevice for cachevol +lvcreate --type cache -n $lv2 -L20M --cachedevice "$dev1" --cachesize 16M -y $vg "$dev2" +check lv_field $vg/$lv2 segtype cache +check lv_field $vg/${lv2}_cache_cvol segtype linear -a +lvcreate --type cache -n $lv3 -L20M --cachedevice "$dev1" --cachesize 16M -y $vg "$dev2" +check lv_field $vg/$lv3 segtype cache +check lv_field $vg/${lv3}_cache_cvol segtype linear -a +lvremove -y $vg/$lv2 +lvremove -y $vg/$lv3 + +## writecache + +# use existing cachevol +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvcreate --type writecache -n $lv2 -L40M --cachevol $lv1 -y $vg "$dev2" +check lv_field $vg/$lv2 segtype writecache +check lv_field $vg/${lv1}_cvol segtype linear -a +lvremove -y $vg/$lv2 + +# use entire cachedevice for cachevol +lvcreate --type writecache -n $lv2 -L40M --cachedevice "$dev1" -y $vg "$dev2" +check lv_field $vg/$lv2 segtype writecache +check lv_field $vg/${lv2}_cache_cvol segtype linear -a +lvremove -y $vg/$lv2 + +# use part of cachedevice for cachevol +lvcreate --type writecache -n $lv2 -L20M --cachedevice "$dev1" --cachesize 16M -y $vg "$dev2" +check lv_field $vg/$lv2 segtype writecache +check lv_field $vg/${lv2}_cache_cvol segtype linear -a +lvcreate --type writecache -n $lv3 -L20M --cachedevice "$dev1" --cachesize 16M -y $vg "$dev2" +check lv_field $vg/$lv3 segtype writecache +check lv_field $vg/${lv3}_cache_cvol segtype linear -a +lvremove -y $vg/$lv2 +lvremove -y $vg/$lv3 + +## multiple cachedevs + +vgextend $vg "$dev3" "$dev4" + +lvcreate --type writecache -n $lv2 -L100M --cachedevice "$dev1" --cachedevice "$dev3" -y $vg "$dev2" "$dev4" +check lv_field $vg/${lv2}_cache_cvol lv_size "120.00m" +lvremove -y $vg/$lv2 + +lvcreate --type writecache -n $lv2 -L100M --cachedevice "$dev1" --cachedevice "$dev3" --cachesize 80M -y $vg "$dev2" "$dev4" +check lv_field $vg/${lv2}_cache_cvol lv_size "80.00m" +lvremove -y $vg/$lv2 + +pvchange --addtag slow "$dev2" +pvchange --addtag slow "$dev4" +pvchange --addtag fast "$dev1" +pvchange --addtag fast "$dev3" + +lvcreate --type writecache -n $lv2 -L100M --cachedevice @fast --cachesize 80M -y $vg @slow +check lv_field $vg/${lv2}_cache_cvol lv_size "80.00m" +lvremove -y $vg/$lv2 + +lvcreate --type cache -n $lv2 -L100M --cachedevice @fast --cachesize 80M -y $vg @slow +check lv_field $vg/${lv2}_cache_cvol lv_size "80.00m" +lvremove -y $vg/$lv2 + +## error cases + +# cachevol doesn't exist +not lvcreate --type cache -n $lv2 -l8 --cachevol asdf -y $vg "$dev2" +not lvs $vg/$lv1 +not lvs $vg/$lv2 + +# cachedevice doesn't exist +not lvcreate --type cache -n $lv2 -l8 --cachedevice asdf -y $vg "$dev2" +not lvs $vg/$lv1 +not lvs $vg/$lv2 + +# cachevol doesn't exist +not lvcreate --type writecache -n $lv2 -l8 --cachevol asdf -y $vg "$dev2" +not lvs $vg/$lv1 +not lvs $vg/$lv2 + +# cachedevice doesn't exist +not lvcreate --type writecache -n $lv2 -l8 --cachedevice asdf -y $vg "$dev2" +not lvs $vg/$lv1 +not lvs $vg/$lv2 + +# when cachedevice is already being used, cachesize is required to use a part of it +lvcreate -n asdf -l1 $vg "$dev1" +not lvcreate --type writecache -n $lv2 -l8 --cachedevice "$dev1" -y $vg "$dev2" +not lvcreate --type writecache -n $lv2 -l8 --cachedevice "$dev1" --cachedevice "$dev3" -y $vg "$dev2" +not lvs $vg/$lv1 +not lvs $vg/$lv2 +lvcreate --type writecache -n $lv2 -l8 --cachedevice "$dev1" --cachesize 8M -y $vg "$dev2" +lvs $vg/$lv2 +check lv_field $vg/${lv2}_cache_cvol lv_size "8.00m" +lvremove -y $vg/$lv2 + +vgremove -ff $vg + +# lvconvert single step cachevol creation and attachment +# . cache and writecache +# . one or two cachedevices +# . with or without --cachesize +# . using tags for devices + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" "$dev4" + +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvconvert -y --type cache --cachedevice "$dev2" $vg/$lv1 +check lv_field $vg/$lv1 segtype cache +check lv_field $vg/${lv1}_cache_cvol segtype linear -a +check lv_field $vg/${lv1}_cache_cvol lv_size "60.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvconvert -y --type cache --cachedevice "$dev2" --cachedevice "$dev3" $vg/$lv1 +check lv_field $vg/$lv1 segtype cache +check lv_field $vg/${lv1}_cache_cvol lv_size "120.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvconvert -y --type cache --cachedevice "$dev2" --cachedevice "$dev3" --cachesize 8M $vg/$lv1 +check lv_field $vg/$lv1 segtype cache +check lv_field $vg/${lv1}_cache_cvol lv_size "8.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvconvert -y --type writecache --cachedevice "$dev2" $vg/$lv1 +check lv_field $vg/$lv1 segtype writecache +check lv_field $vg/${lv1}_cache_cvol lv_size "60.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvconvert -y --type writecache --cachedevice "$dev2" --cachedevice "$dev3" $vg/$lv1 +check lv_field $vg/$lv1 segtype writecache +check lv_field $vg/${lv1}_cache_cvol lv_size "120.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +lvcreate -n $lv1 -l8 -an $vg "$dev1" +lvconvert -y --type writecache --cachedevice "$dev2" --cachedevice "$dev3" --cachesize 8M $vg/$lv1 +check lv_field $vg/$lv1 segtype writecache +check lv_field $vg/${lv1}_cache_cvol lv_size "8.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +pvchange --addtag slow "$dev1" +pvchange --addtag fast "$dev2" +pvchange --addtag fast "$dev3" + +lvcreate -n $lv1 -l8 -an $vg @slow +lvconvert -y --type cache --cachedevice @fast --cachesize 8M $vg/$lv1 +check lv_field $vg/$lv1 segtype cache +check lv_field $vg/${lv1}_cache_cvol lv_size "8.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +lvcreate -n $lv1 -l8 -an $vg @slow +lvconvert -y --type writecache --cachedevice @fast --cachesize 8M $vg/$lv1 +check lv_field $vg/$lv1 segtype writecache +check lv_field $vg/${lv1}_cache_cvol lv_size "8.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# if the cache name is used generate a new name +lvcreate -n $lv1 -l8 -an $vg @slow +lvcreate -n ${lv1}_cache -l1 -an $vg @slow +lvconvert -y --type writecache --cachedevice @fast --cachesize 8M $vg/$lv1 +check lv_field $vg/$lv1 segtype writecache +check lv_field $vg/${lv1}_cache0_cvol lv_size "8.00m" +lvchange -ay $vg/$lv1 +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +vgremove -ff $vg + diff --git a/test/shell/integrity-blocksize-2.sh b/test/shell/integrity-blocksize-2.sh new file mode 100644 index 0000000..5e0fd9a --- /dev/null +++ b/test/shell/integrity-blocksize-2.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash + +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +aux have_integrity 1 5 0 || skip + +mnt="mnt" +mkdir -p $mnt + +_sync_percent() { + local checklv=$1 + get lv_field "$checklv" sync_percent | cut -d. -f1 +} + +_wait_recalc() { + local checklv=$1 + + for i in $(seq 1 10) ; do + sync=$(_sync_percent "$checklv") + echo "sync_percent is $sync" + + if test "$sync" = "100"; then + return + fi + + sleep 1 + done + + # TODO: There is some strange bug, first leg of RAID with integrity + # enabled never gets in sync. I saw this in BB, but not when executing + # the commands manually + if test -z "$sync"; then + echo "TEST WARNING: Resync of dm-integrity device '$checklv' failed" + dmsetup status "$DM_DEV_DIR/mapper/${checklv/\//-}" + exit + fi + echo "timeout waiting for recalc" + return 1 +} + +# prepare_devs uses ramdisk backing which has 512 LBS and 4K PBS +# This should cause mkfs.xfs to use 4K sector size, +# and integrity to use 4K block size +aux prepare_devs 2 64 + +vgcreate $vg "$dev1" "$dev2" +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# add integrity while LV is inactive +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvconvert --raidintegrity y $vg/$lv1 +lvchange -ay $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs unmounted +# lvconvert will use ribs 512 to avoid increasing LBS from 512 to 4k on active LV +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvchange -ay $vg +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs mounted +# lvconvert will use ribs 512 to avoid increasing LBS from 512 to 4k on active LV +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +vgremove -ff $vg + diff --git a/test/shell/integrity-blocksize-3.sh b/test/shell/integrity-blocksize-3.sh new file mode 100644 index 0000000..4aea972 --- /dev/null +++ b/test/shell/integrity-blocksize-3.sh @@ -0,0 +1,285 @@ +#!/usr/bin/env bash + +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +aux have_integrity 1 5 0 || skip + +mnt="mnt" +mkdir -p $mnt + +_sync_percent() { + local checklv=$1 + get lv_field "$checklv" sync_percent | cut -d. -f1 +} + +_wait_recalc() { + local checklv=$1 + + for i in $(seq 1 10) ; do + sync=$(_sync_percent "$checklv") + echo "sync_percent is $sync" + + if test "$sync" = "100"; then + return + fi + + sleep 1 + done + + # TODO: There is some strange bug, first leg of RAID with integrity + # enabled never gets in sync. I saw this in BB, but not when executing + # the commands manually + if test -z "$sync"; then + echo "TEST WARNING: Resync of dm-integrity device '$checklv' failed" + dmsetup status "$DM_DEV_DIR/mapper/${checklv/\//-}" + exit + fi + echo "timeout waiting for recalc" + return 1 +} + +# scsi_debug devices with 512 LBS 512 PBS +aux prepare_scsi_debug_dev 256 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "512" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "512" +aux prepare_devs 2 64 + +vgcreate $vg "$dev1" "$dev2" +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# add integrity while LV is inactive +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvconvert --raidintegrity y $vg/$lv1 +lvchange -ay $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs unmounted +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvchange -ay $vg +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs mounted +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +vgremove -ff $vg +aux cleanup_scsi_debug_dev +sleep 1 + +# scsi_debug devices with 4K LBS and 4K PBS +aux prepare_scsi_debug_dev 256 sector_size=4096 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "4096" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "4096" +aux prepare_devs 2 64 + +vgcreate $vg "$dev1" "$dev2" +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# add integrity while LV is inactive +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvconvert --raidintegrity y $vg/$lv1 +lvchange -ay $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs unmounted +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvchange -ay $vg +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs mounted +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +vgremove -ff $vg +aux cleanup_scsi_debug_dev +sleep 1 + +# scsi_debug devices with 512 LBS and 4K PBS +aux prepare_scsi_debug_dev 256 sector_size=512 physblk_exp=3 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "512" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "4096" +aux prepare_devs 2 64 + +vgcreate $vg "$dev1" "$dev2" +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# add integrity while LV is inactive +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvconvert --raidintegrity y $vg/$lv1 +lvchange -ay $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs unmounted +# lvconvert will use ribs 512 to avoid increasing LBS from 512 to 4k on active LV +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +umount $mnt +lvchange -an $vg +lvchange -ay $vg +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +# add integrity while LV is active, fs mounted +# lvconvert will use ribs 512 to avoid increasing LBS from 512 to 4k on active LV +lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +echo "hello world" > $mnt/hello +lvconvert --raidintegrity y $vg/$lv1 +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +lvs -a -o+devices $vg +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvchange -ay $vg/$lv1 +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +cat $mnt/hello | grep "hello world" +umount $mnt +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 + +vgremove -ff $vg +aux cleanup_scsi_debug_dev +sleep 1 + diff --git a/test/shell/integrity-blocksize.sh b/test/shell/integrity-blocksize.sh index 444e3db..eb6a364 100644 --- a/test/shell/integrity-blocksize.sh +++ b/test/shell/integrity-blocksize.sh @@ -48,9 +48,24 @@ aux extend_filter "a|$LOOP4|" aux lvmconf 'devices/scan = "/dev"' +mnt="mnt" +mkdir -p $mnt + vgcreate $vg1 $LOOP1 $LOOP2 vgcreate $vg2 $LOOP3 $LOOP4 +# LOOP1/LOOP2 have LBS 512 and PBS 512 +# LOOP3/LOOP4 have LBS 4K and PBS 4K + +blockdev --getss $LOOP1 +blockdev --getpbsz $LOOP1 +blockdev --getss $LOOP2 +blockdev --getpbsz $LOOP2 +blockdev --getss $LOOP3 +blockdev --getpbsz $LOOP3 +blockdev --getss $LOOP4 +blockdev --getpbsz $LOOP4 + # lvcreate on dev512, result 512 lvcreate --type raid1 -m1 --raidintegrity y -l 8 -n $lv1 $vg1 pvck --dump metadata $LOOP1 | grep 'block_size = 512' @@ -105,7 +120,11 @@ lvremove -y $vg2/$lv1 lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg1 aux wipefs_a /dev/$vg1/$lv1 mkfs.xfs -f "$DM_DEV_DIR/$vg1/$lv1" +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"512\" lvconvert --raidintegrity y $vg1/$lv1 +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"512\" +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +umount $mnt pvck --dump metadata $LOOP1 | grep 'block_size = 512' lvremove -y $vg1/$lv1 @@ -113,15 +132,37 @@ lvremove -y $vg1/$lv1 lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg2 aux wipefs_a /dev/$vg2/$lv1 mkfs.xfs -f "$DM_DEV_DIR/$vg2/$lv1" +blkid "$DM_DEV_DIR/$vg2/$lv1" | grep BLOCK_SIZE=\"4096\" lvconvert --raidintegrity y $vg2/$lv1 +blkid "$DM_DEV_DIR/$vg2/$lv1" | grep BLOCK_SIZE=\"4096\" +mount "$DM_DEV_DIR/$vg2/$lv1" $mnt +umount $mnt pvck --dump metadata $LOOP3 | grep 'block_size = 4096' lvremove -y $vg2/$lv1 -# lvconvert on dev512, ext4 1024, result 1024 +# lvconvert on dev512, ext4 1024, result 1024 (LV active when adding) +lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg1 +aux wipefs_a /dev/$vg1/$lv1 +mkfs.ext4 -b 1024 "$DM_DEV_DIR/$vg1/$lv1" +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"1024\" +lvconvert --raidintegrity y $vg1/$lv1 +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"1024\" +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +umount $mnt +pvck --dump metadata $LOOP1 | grep 'block_size = 512' +lvremove -y $vg1/$lv1 + +# lvconvert on dev512, ext4 1024, result 1024 (LV inactive when adding) lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg1 aux wipefs_a /dev/$vg1/$lv1 mkfs.ext4 -b 1024 "$DM_DEV_DIR/$vg1/$lv1" +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"1024\" +lvchange -an $vg1/$lv1 lvconvert --raidintegrity y $vg1/$lv1 +lvchange -ay $vg1/$lv1 +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"1024\" +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +umount $mnt pvck --dump metadata $LOOP1 | grep 'block_size = 1024' lvremove -y $vg1/$lv1 @@ -129,7 +170,11 @@ lvremove -y $vg1/$lv1 lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg2 aux wipefs_a /dev/$vg2/$lv1 mkfs.ext4 "$DM_DEV_DIR/$vg2/$lv1" +blkid "$DM_DEV_DIR/$vg2/$lv1" | grep BLOCK_SIZE=\"4096\" lvconvert --raidintegrity y $vg2/$lv1 +blkid "$DM_DEV_DIR/$vg2/$lv1" | grep BLOCK_SIZE=\"4096\" +mount "$DM_DEV_DIR/$vg2/$lv1" $mnt +umount $mnt pvck --dump metadata $LOOP3 | grep 'block_size = 4096' lvremove -y $vg2/$lv1 @@ -137,7 +182,11 @@ lvremove -y $vg2/$lv1 lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg1 aux wipefs_a /dev/$vg1/$lv1 mkfs.xfs -f -s size=4096 "$DM_DEV_DIR/$vg1/$lv1" +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"4096\" lvconvert --raidintegrity y --raidintegrityblocksize 512 $vg1/$lv1 +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"4096\" +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +umount $mnt pvck --dump metadata $LOOP1 | grep 'block_size = 512' lvremove -y $vg1/$lv1 @@ -145,7 +194,14 @@ lvremove -y $vg1/$lv1 lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg1 aux wipefs_a /dev/$vg1/$lv1 mkfs.xfs -f -s size=4096 "$DM_DEV_DIR/$vg1/$lv1" +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"4096\" +lvchange -an $vg1/$lv1 +# lv needs to be inactive to increase LBS from 512 lvconvert --raidintegrity y --raidintegrityblocksize 1024 $vg1/$lv1 +lvchange -ay $vg1/$lv1 +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"4096\" +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +umount $mnt pvck --dump metadata $LOOP1 | grep 'block_size = 1024' lvremove -y $vg1/$lv1 @@ -153,7 +209,11 @@ lvremove -y $vg1/$lv1 lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg1 aux wipefs_a /dev/$vg1/$lv1 mkfs.ext4 -b 1024 "$DM_DEV_DIR/$vg1/$lv1" +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"1024\" lvconvert --raidintegrity y --raidintegrityblocksize 512 $vg1/$lv1 +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"1024\" +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +umount $mnt pvck --dump metadata $LOOP1 | grep 'block_size = 512' lvremove -y $vg1/$lv1 @@ -164,10 +224,48 @@ mkfs.ext4 "$DM_DEV_DIR/$vg2/$lv1" not lvconvert --raidintegrity y --raidintegrityblocksize 512 $vg2/$lv1 lvremove -y $vg2/$lv1 -# FIXME: need to use scsi_debug to create devs with LBS 512 PBS 4k -# FIXME: lvconvert, fsunknown, LBS 512, PBS 4k: result 512 -# FIXME: lvconvert --bs 512, fsunknown, LBS 512, PBS 4k: result 512 -# FIXME: lvconvert --bs 4k, fsunknown, LBS 512, PBS 4k: result 4k +# TODO: need to use scsi_debug to create devs with LBS 512 PBS 4k +# TODO: lvconvert, fsunknown, LBS 512, PBS 4k: result 512 +# TODO: lvconvert --bs 512, fsunknown, LBS 512, PBS 4k: result 512 +# TODO: lvconvert --bs 4k, fsunknown, LBS 512, PBS 4k: result 4k + +# lvconvert on dev512, xfs 512, result 512, (detect fs with LV inactive) +lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg1 +aux wipefs_a /dev/$vg1/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg1/$lv1" +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +echo "test" > $mnt/test +umount $mnt +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"512\" +lvchange -an $vg1/$lv1 +lvconvert --raidintegrity y $vg1/$lv1 +lvchange -ay $vg1/$lv1 +mount "$DM_DEV_DIR/$vg1/$lv1" $mnt +cat $mnt/test +umount $mnt +blkid "$DM_DEV_DIR/$vg1/$lv1" | grep BLOCK_SIZE=\"512\" +pvck --dump metadata $LOOP1 | grep 'block_size = 512' +lvchange -an $vg1/$lv1 +lvremove -y $vg1/$lv1 + +# lvconvert on dev4k, xfs 4096, result 4096 (detect fs with LV inactive) +lvcreate --type raid1 -m1 -l 8 -n $lv1 $vg2 +aux wipefs_a /dev/$vg2/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg2/$lv1" +mount "$DM_DEV_DIR/$vg2/$lv1" $mnt +echo "test" > $mnt/test +umount $mnt +blkid "$DM_DEV_DIR/$vg2/$lv1" | grep BLOCK_SIZE=\"4096\" +lvchange -an $vg2/$lv1 +lvconvert --raidintegrity y $vg2/$lv1 +lvchange -ay $vg2/$lv1 +mount "$DM_DEV_DIR/$vg2/$lv1" $mnt +cat $mnt/test +umount $mnt +blkid "$DM_DEV_DIR/$vg2/$lv1" | grep BLOCK_SIZE=\"4096\" +pvck --dump metadata $LOOP3 | grep 'block_size = 4096' +lvchange -an $vg2/$lv1 +lvremove -y $vg2/$lv1 vgremove -ff $vg1 vgremove -ff $vg2 diff --git a/test/shell/integrity-large.sh b/test/shell/integrity-large.sh index 5aba80e..06b0e03 100644 --- a/test/shell/integrity-large.sh +++ b/test/shell/integrity-large.sh @@ -23,7 +23,7 @@ mnt="mnt" mkdir -p $mnt # raid1 LV needs to be extended to 512MB to test imeta being exended -aux prepare_devs 4 600 +aux prepare_devs 4 632 printf "%0.sA" {1..16384} >> fileA printf "%0.sB" {1..16384} >> fileB @@ -115,7 +115,10 @@ lvcreate --type raid1 -m1 -n $lv1 -l 8 $vg lvchange -an $vg/$lv1 lvchange -ay $vg/$lv1 _add_data_to_lv +# lv needs to be inactive when adding integrity to increase LBS from 512 and get a ribs of 4k +lvchange -an $vg/$lv1 lvconvert --raidintegrity y $vg/$lv1 +lvchange -ay $vg/$lv1 _wait_recalc $vg/${lv1}_rimage_0 _wait_recalc $vg/${lv1}_rimage_1 lvs -a -o+devices $vg @@ -128,8 +131,8 @@ _verify_data_on_lv _wait_recalc $vg/${lv1}_rimage_0 _wait_recalc $vg/${lv1}_rimage_1 lvs -a -o+devices $vg -check lv_field $vg/${lv1}_rimage_0_imeta size "8.00m" -check lv_field $vg/${lv1}_rimage_1_imeta size "8.00m" +check lv_field $vg/${lv1}_rimage_0_imeta size "12.00m" +check lv_field $vg/${lv1}_rimage_1_imeta size "12.00m" # provide space to extend the images onto new devs vgextend $vg "$dev3" "$dev4" @@ -150,33 +153,35 @@ lvconvert --raidintegrity y $vg/$lv1 _wait_recalc $vg/${lv1}_rimage_0 _wait_recalc $vg/${lv1}_rimage_1 lvs -a -o+devices $vg -check lv_field $vg/${lv1}_rimage_0_imeta size "12.00m" -check lv_field $vg/${lv1}_rimage_1_imeta size "12.00m" +check lv_field $vg/${lv1}_rimage_0_imeta size "20.00m" +check lv_field $vg/${lv1}_rimage_1_imeta size "20.00m" lvchange -an $vg/$lv1 lvremove $vg/$lv1 # this succeeds because dev1,dev2 can hold rmeta+rimage lvcreate --type raid1 -n $lv1 -L 592M -an $vg "$dev1" "$dev2" +lvs -a -o+devices $vg +lvchange -an $vg/$lv1 +lvremove $vg/$lv1 # this fails because dev1,dev2 can hold rmeta+rimage, but not imeta # and we require imeta to be on same devs as rmeta/rimeta -not lvcreate --type raid1 --raidintegrity y -n $lv1 -L 592M -an $vg "$dev1" "$dev2" +not lvcreate --type raid1 --raidintegrity y -n $lv1 -L 624M -an $vg "$dev1" "$dev2" lvs -a -o+devices $vg -lvremove $vg/$lv1 # this can allocate from more devs so there's enough space for imeta to # be allocated in the vg, but lvcreate fails because rmeta+rimage are # allocated from dev1,dev2, we restrict imeta to being allocated on the # same devs as rmeta/rimage, and dev1,dev2 can't fit imeta. -not lvcreate --type raid1 --raidintegrity y -n $lv1 -L 592M -an $vg +not lvcreate --type raid1 --raidintegrity y -n $lv1 -L 624M -an $vg lvs -a -o+devices $vg # counterintuitively, increasing the size will allow lvcreate to succeed # because rmeta+rimage are pushed to being allocated on dev1,dev2,dev3,dev4 # which means imeta is now free to be allocated from dev3,dev4 which have # plenty of space -lvcreate --type raid1 --raidintegrity y -n $lv1 -L 600M -an $vg +lvcreate --type raid1 --raidintegrity y -n $lv1 -L 640M -an $vg lvs -a -o+devices $vg vgremove -ff $vg diff --git a/test/shell/integrity-misc.sh b/test/shell/integrity-misc.sh index 0d05689..2dae25f 100644 --- a/test/shell/integrity-misc.sh +++ b/test/shell/integrity-misc.sh @@ -95,7 +95,7 @@ _sync_percent() { get lv_field "$checklv" sync_percent | cut -d. -f1 } -_wait_recalc() { +_wait_sync() { local checklv=$1 for i in $(seq 1 10) ; do @@ -124,8 +124,9 @@ _wait_recalc() { # lvrename _prepare_vg lvcreate --type raid1 -m1 --raidintegrity y -n $lv1 -l 8 $vg -_wait_recalc $vg/${lv1}_rimage_0 -_wait_recalc $vg/${lv1}_rimage_1 +_wait_sync $vg/${lv1}_rimage_0 +_wait_sync $vg/${lv1}_rimage_1 +_wait_sync $vg/$lv1 _add_new_data_to_mnt umount $mnt lvrename $vg/$lv1 $vg/$lv2 @@ -141,8 +142,9 @@ vgremove -ff $vg # lv must be active _prepare_vg lvcreate --type raid1 -m1 --raidintegrity y -n $lv1 -l 8 $vg "$dev1" "$dev2" -_wait_recalc $vg/${lv1}_rimage_0 -_wait_recalc $vg/${lv1}_rimage_1 +_wait_sync $vg/${lv1}_rimage_0 +_wait_sync $vg/${lv1}_rimage_1 +_wait_sync $vg/$lv1 _add_new_data_to_mnt lvconvert --replace "$dev1" $vg/$lv1 "$dev3" lvs -a -o+devices $vg > out @@ -162,8 +164,9 @@ vgremove -ff $vg # same as prev but with bitmap mode _prepare_vg lvcreate --type raid1 -m1 --raidintegrity y --raidintegritymode bitmap -n $lv1 -l 8 $vg "$dev1" "$dev2" -_wait_recalc $vg/${lv1}_rimage_0 -_wait_recalc $vg/${lv1}_rimage_1 +_wait_sync $vg/${lv1}_rimage_0 +_wait_sync $vg/${lv1}_rimage_1 +_wait_sync $vg/$lv1 _add_new_data_to_mnt lvconvert --replace "$dev1" $vg/$lv1 "$dev3" lvs -a -o+devices $vg > out @@ -185,8 +188,9 @@ vgremove -ff $vg # (like lvconvert --replace does for a dev that's not missing). _prepare_vg lvcreate --type raid1 -m1 --raidintegrity y -n $lv1 -l 8 $vg "$dev1" "$dev2" -_wait_recalc $vg/${lv1}_rimage_0 -_wait_recalc $vg/${lv1}_rimage_1 +_wait_sync $vg/${lv1}_rimage_0 +_wait_sync $vg/${lv1}_rimage_1 +_wait_sync $vg/$lv1 _add_new_data_to_mnt aux disable_dev "$dev2" lvs -a -o+devices $vg > out @@ -213,8 +217,9 @@ vgremove -ff $vg _prepare_vg lvcreate --type raid1 -m1 --raidintegrity y -n $lv1 -l 8 $vg "$dev1" "$dev2" -_wait_recalc $vg/${lv1}_rimage_0 -_wait_recalc $vg/${lv1}_rimage_1 +_wait_sync $vg/${lv1}_rimage_0 +_wait_sync $vg/${lv1}_rimage_1 +_wait_sync $vg/$lv1 _add_new_data_to_mnt umount $mnt lvchange -an $vg/$lv1 diff --git a/test/shell/integrity.sh b/test/shell/integrity.sh index 77e9430..0143129 100644 --- a/test/shell/integrity.sh +++ b/test/shell/integrity.sh @@ -78,14 +78,14 @@ _test_fs_with_error() { dd if=$mnt/fileA of=tmp bs=1k ls -l tmp stat -c %s tmp - diff fileA tmp + cmp -b fileA tmp rm tmp # read partial fileB which was corrupted not dd if=$mnt/fileB of=tmp bs=1k ls -l tmp stat -c %s tmp | grep 12288 - not diff fileB tmp + not cmp -b fileB tmp rm tmp umount $mnt @@ -118,14 +118,14 @@ _test_fs_with_raid() { dd if=$mnt/fileA of=tmp bs=1k ls -l tmp stat -c %s tmp | grep 16384 - diff fileA tmp + cmp -b fileA tmp rm tmp # read complete fileB, corruption is corrected by raid dd if=$mnt/fileB of=tmp bs=1k ls -l tmp stat -c %s tmp | grep 16384 - diff fileB tmp + cmp -b fileB tmp rm tmp umount $mnt @@ -161,15 +161,15 @@ _add_more_data_to_mnt() { } _verify_data_on_mnt() { - diff randA $mnt/randA - diff randB $mnt/randB - diff randC $mnt/randC - diff fileA $mnt/1/fileA - diff fileB $mnt/1/fileB - diff fileC $mnt/1/fileC - diff fileA $mnt/2/fileA - diff fileB $mnt/2/fileB - diff fileC $mnt/2/fileC + cmp -b randA $mnt/randA + cmp -b randB $mnt/randB + cmp -b randC $mnt/randC + cmp -b fileA $mnt/1/fileA + cmp -b fileB $mnt/1/fileB + cmp -b fileC $mnt/1/fileC + cmp -b fileA $mnt/2/fileA + cmp -b fileB $mnt/2/fileB + cmp -b fileC $mnt/2/fileC } _verify_data_on_lv() { @@ -221,6 +221,8 @@ _wait_recalc() { _prepare_vg lvcreate --type raid1 -m1 --raidintegrity y -n $lv1 -l 8 $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 _test_fs_with_raid lvchange -an $vg/$lv1 lvconvert --raidintegrity n $vg/$lv1 @@ -229,6 +231,9 @@ vgremove -ff $vg _prepare_vg lvcreate --type raid1 -m2 --raidintegrity y -n $lv1 -l 8 $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +_wait_recalc $vg/${lv1}_rimage_2 _test_fs_with_raid lvchange -an $vg/$lv1 lvconvert --raidintegrity n $vg/$lv1 @@ -237,6 +242,9 @@ vgremove -ff $vg _prepare_vg lvcreate --type raid4 --raidintegrity y -n $lv1 -l 8 $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +_wait_recalc $vg/${lv1}_rimage_2 _test_fs_with_raid lvchange -an $vg/$lv1 lvconvert --raidintegrity n $vg/$lv1 @@ -245,6 +253,9 @@ vgremove -ff $vg _prepare_vg lvcreate --type raid5 --raidintegrity y -n $lv1 -l 8 $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +_wait_recalc $vg/${lv1}_rimage_2 _test_fs_with_raid lvchange -an $vg/$lv1 lvconvert --raidintegrity n $vg/$lv1 @@ -253,6 +264,11 @@ vgremove -ff $vg _prepare_vg lvcreate --type raid6 --raidintegrity y -n $lv1 -l 8 $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +_wait_recalc $vg/${lv1}_rimage_2 +_wait_recalc $vg/${lv1}_rimage_3 +_wait_recalc $vg/${lv1}_rimage_4 _test_fs_with_raid lvchange -an $vg/$lv1 lvconvert --raidintegrity n $vg/$lv1 @@ -261,6 +277,10 @@ vgremove -ff $vg _prepare_vg lvcreate --type raid10 --raidintegrity y -n $lv1 -l 8 $vg +_wait_recalc $vg/${lv1}_rimage_0 +_wait_recalc $vg/${lv1}_rimage_1 +_wait_recalc $vg/${lv1}_rimage_2 +_wait_recalc $vg/${lv1}_rimage_3 _test_fs_with_raid lvchange -an $vg/$lv1 lvconvert --raidintegrity n $vg/$lv1 diff --git a/test/shell/lvconvert-m-raid1-degraded.sh b/test/shell/lvconvert-m-raid1-degraded.sh index 05c3e89..c3f7085 100644 --- a/test/shell/lvconvert-m-raid1-degraded.sh +++ b/test/shell/lvconvert-m-raid1-degraded.sh @@ -33,8 +33,10 @@ aux disable_dev "$dev1" vgreduce --force --removemissing $vg check raid_leg_status $vg $lv "DA" -# Conversion to 2 legs must fail on degraded 2-legged raid1 LV -not lvconvert -y -m1 $vg/$lv +# Conversion to 2 legs does nothing on degraded 2-legged raid1 LV +lvconvert -y -m1 $vg/$lv 2>&1 | tee out +grep "already has 2 images" out +# Check it remains degraded after the successful "conversion" check raid_leg_status $vg $lv "DA" # Repair has to succeed diff --git a/test/shell/lvcreate-signature-wiping.sh b/test/shell/lvcreate-signature-wiping.sh index 73fea54..18d7a2f 100644 --- a/test/shell/lvcreate-signature-wiping.sh +++ b/test/shell/lvcreate-signature-wiping.sh @@ -42,6 +42,13 @@ init_lv_ test_blkid_ || skip lvremove -f $vg/$lv1 +# Zeroing stops the command when there is a failure (write error in this case) +aux error_dev "$dev1" "$(get first_extent_sector "$dev1"):2" +not lvcreate -l1 -n $lv1 $vg 2>&1 | tee out +grep "Failed to initialize" out +aux enable_dev "$dev1" + + aux lvmconf "allocation/wipe_signatures_when_zeroing_new_lvs = 0" lvcreate -y -Zn -l1 -n $lv1 $vg 2>&1 | tee out diff --git a/test/shell/lvcreate-thin.sh b/test/shell/lvcreate-thin.sh index 9ca7f11..c073eaf 100644 --- a/test/shell/lvcreate-thin.sh +++ b/test/shell/lvcreate-thin.sh @@ -248,4 +248,25 @@ not lvcreate -s $vg/lv1 -L4M -V2G --name $vg/lv4 not lvcreate -T mirpool -L4M --alloc anywhere -m1 $vg not lvcreate --thinpool mirpool -L4M --alloc anywhere -m1 $vg + +# Check pool metadata volume is zeroed, when zero_metadata is enabled. +# 1st. ensure 8megs of both PVs will have some non-0 data +lvcreate -L8m -n $lv1 $vg "$dev1" +lvextend -L+8m $vg/$lv1 "$dev2" +dd if=/dev/urandom of="$DM_DEV_DIR/$vg/$lv1" bs=1M count=16 oflag=direct conv=fdatasync +lvremove -ff $vg/$lv1 + +lvcreate -l1 --poolmetadatasize 4m --conf 'allocation/zero_metadata=1' -vvvv -T $vg/pool +lvchange -an $vg +# component activation to check device was zeroed +lvchange -y -ay $vg/pool_tmeta +dd if="$DM_DEV_DIR/$vg/pool_tmeta" of=file bs=1M count=3 skip=1 iflag=direct conv=fdatasync + +md5sum -b file | tee out +# md5sum of 3M of zeros +grep d1dd210d6b1312cb342b56d02bd5e651 out +lvchange -an $vg +lvremove -ff $vg + + vgremove -ff $vg diff --git a/test/shell/writecache-blocksize.sh b/test/shell/writecache-blocksize.sh new file mode 100644 index 0000000..1300176 --- /dev/null +++ b/test/shell/writecache-blocksize.sh @@ -0,0 +1,342 @@ +#!/usr/bin/env bash + +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# Test writecache usage + +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +aux have_writecache 1 0 0 || skip +which mkfs.xfs || skip + +# Tests with fs block sizes require a libblkid version that shows BLOCK_SIZE +aux prepare_devs 1 +vgcreate $vg "$dev1" +lvcreate -n $lv1 -l8 $vg +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" +blkid "$DM_DEV_DIR/$vg/$lv1" | grep BLOCK_SIZE || skip +lvchange -an $vg +vgremove -ff $vg +aux cleanup_scsi_debug_dev + +mnt="mnt" +mkdir -p $mnt + +for i in `seq 1 16384`; do echo -n "A" >> fileA; done +for i in `seq 1 16384`; do echo -n "B" >> fileB; done +for i in `seq 1 16384`; do echo -n "C" >> fileC; done + +# generate random data +dd if=/dev/urandom of=randA bs=512K count=2 +dd if=/dev/urandom of=randB bs=512K count=3 +dd if=/dev/urandom of=randC bs=512K count=4 + +_add_new_data_to_mnt() { + mount "$DM_DEV_DIR/$vg/$lv1" $mnt + + # add original data + cp randA $mnt + cp randB $mnt + cp randC $mnt + mkdir $mnt/1 + cp fileA $mnt/1 + cp fileB $mnt/1 + cp fileC $mnt/1 + mkdir $mnt/2 + cp fileA $mnt/2 + cp fileB $mnt/2 + cp fileC $mnt/2 + sync +} + +_add_more_data_to_mnt() { + mkdir $mnt/more + cp fileA $mnt/more + cp fileB $mnt/more + cp fileC $mnt/more + cp randA $mnt/more + cp randB $mnt/more + cp randC $mnt/more + sync +} + +_verify_data_on_mnt() { + diff randA $mnt/randA + diff randB $mnt/randB + diff randC $mnt/randC + diff fileA $mnt/1/fileA + diff fileB $mnt/1/fileB + diff fileC $mnt/1/fileC + diff fileA $mnt/2/fileA + diff fileB $mnt/2/fileB + diff fileC $mnt/2/fileC +} + +_verify_more_data_on_mnt() { + diff randA $mnt/more/randA + diff randB $mnt/more/randB + diff randC $mnt/more/randC + diff fileA $mnt/more/fileA + diff fileB $mnt/more/fileB + diff fileC $mnt/more/fileC +} + +_verify_data_on_lv() { + lvchange -ay $vg/$lv1 + mount "$DM_DEV_DIR/$vg/$lv1" $mnt + _verify_data_on_mnt + rm $mnt/randA + rm $mnt/randB + rm $mnt/randC + rm -rf $mnt/1 + rm -rf $mnt/2 + umount $mnt + lvchange -an $vg/$lv1 +} + +# the default is brd ram devs with 512 LBS 4K PBS +aux prepare_devs 2 64 + +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# lbs 512, pbs 4k, xfs 4k, wc 4k +vgcreate $SHARED $vg "$dev1" +vgextend $vg "$dev2" +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" |tee out +grep sectsz=4096 out +_add_new_data_to_mnt +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" |tee out +grep 4096 out +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_more_data_to_mnt +_verify_data_on_mnt +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt +lvchange -an $vg/$lv1 +lvchange -an $vg/$lv2 +_verify_data_on_lv +lvremove $vg/$lv1 +lvremove $vg/$lv2 +vgremove $vg + +# lbs 512, pbs 4k, xfs -s 512, wc 512 +vgcreate $SHARED $vg "$dev1" +vgextend $vg "$dev2" +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" +lvchange -ay $vg/$lv1 +mkfs.xfs -f -s size=512 "$DM_DEV_DIR/$vg/$lv1" |tee out +grep sectsz=512 out +_add_new_data_to_mnt +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" |tee out +grep 512 out +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_more_data_to_mnt +_verify_data_on_mnt +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt +lvchange -an $vg/$lv1 +lvchange -an $vg/$lv2 +_verify_data_on_lv +lvremove $vg/$lv1 +lvremove $vg/$lv2 +vgremove $vg + +aux cleanup_scsi_debug_dev +sleep 1 + + +# scsi_debug devices with 512 LBS 512 PBS +aux prepare_scsi_debug_dev 256 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "512" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "512" +aux prepare_devs 2 64 + +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# lbs 512, pbs 512, xfs 512, wc 512 +vgcreate $SHARED $vg "$dev1" +vgextend $vg "$dev2" +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" |tee out +grep sectsz=512 out +_add_new_data_to_mnt +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" |tee out +grep 512 out +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_more_data_to_mnt +_verify_data_on_mnt +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt +lvchange -an $vg/$lv1 +lvchange -an $vg/$lv2 +_verify_data_on_lv +lvremove $vg/$lv1 +lvremove $vg/$lv2 +vgremove $vg + +# lbs 512, pbs 512, xfs -s 4096, wc 4096 +vgcreate $SHARED $vg "$dev1" +vgextend $vg "$dev2" +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" +lvchange -ay $vg/$lv1 +mkfs.xfs -s size=4096 -f "$DM_DEV_DIR/$vg/$lv1" |tee out +grep sectsz=4096 out +_add_new_data_to_mnt +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" |tee out +grep 4096 out +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_more_data_to_mnt +_verify_data_on_mnt +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt +lvchange -an $vg/$lv1 +lvchange -an $vg/$lv2 +_verify_data_on_lv +lvremove $vg/$lv1 +lvremove $vg/$lv2 +vgremove $vg + +aux cleanup_scsi_debug_dev +sleep 1 + + +# scsi_debug devices with 512 LBS and 4K PBS +aux prepare_scsi_debug_dev 256 sector_size=512 physblk_exp=3 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "512" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "4096" +aux prepare_devs 2 64 + +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# lbs 512, pbs 4k, xfs 4k, wc 4k +vgcreate $SHARED $vg "$dev1" +vgextend $vg "$dev2" +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" |tee out +grep sectsz=4096 out +_add_new_data_to_mnt +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" |tee out +grep 4096 out +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_more_data_to_mnt +_verify_data_on_mnt +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt +lvchange -an $vg/$lv1 +lvchange -an $vg/$lv2 +_verify_data_on_lv +lvremove $vg/$lv1 +lvremove $vg/$lv2 +vgremove $vg + +aux cleanup_scsi_debug_dev +sleep 1 + + +# scsi_debug devices with 4K LBS and 4K PBS +aux prepare_scsi_debug_dev 256 sector_size=4096 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "4096" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "4096" +aux prepare_devs 2 64 + +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +# lbs 4k, pbs 4k, xfs 4k, wc 4k +vgcreate $SHARED $vg "$dev1" +vgextend $vg "$dev2" +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" +lvchange -ay $vg/$lv1 +mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" |tee out +grep sectsz=4096 out +_add_new_data_to_mnt +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" |tee out +grep 4096 out +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_more_data_to_mnt +_verify_data_on_mnt +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt +lvchange -an $vg/$lv1 +lvchange -an $vg/$lv2 +_verify_data_on_lv +lvremove $vg/$lv1 +lvremove $vg/$lv2 +vgremove $vg + +aux cleanup_scsi_debug_dev + + diff --git a/test/shell/writecache-large.sh b/test/shell/writecache-large.sh new file mode 100644 index 0000000..b52eaf6 --- /dev/null +++ b/test/shell/writecache-large.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash + +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# Test writecache usage + +SKIP_WITH_LVMPOLLD=1 + +. lib/inittest + +aux have_writecache 1 0 0 || skip +which mkfs.xfs || skip + +# scsi_debug devices with 512 LBS 512 PBS +aux prepare_scsi_debug_dev 1200 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "512" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "512" + +aux prepare_devs 2 600 +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + +mnt="mnt" +mkdir -p $mnt + +for i in `seq 1 16384`; do echo -n "A" >> fileA; done +for i in `seq 1 16384`; do echo -n "B" >> fileB; done +for i in `seq 1 16384`; do echo -n "C" >> fileC; done + +# generate random data +dd if=/dev/urandom of=randA bs=512K count=2 +dd if=/dev/urandom of=randB bs=512K count=3 +dd if=/dev/urandom of=randC bs=512K count=4 + +_add_new_data_to_mnt() { + mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" + + mount "$DM_DEV_DIR/$vg/$lv1" $mnt + + # add original data + cp randA $mnt + cp randB $mnt + cp randC $mnt + mkdir $mnt/1 + cp fileA $mnt/1 + cp fileB $mnt/1 + cp fileC $mnt/1 + mkdir $mnt/2 + cp fileA $mnt/2 + cp fileB $mnt/2 + cp fileC $mnt/2 + sync +} + +_add_more_data_to_mnt() { + mkdir $mnt/more + cp fileA $mnt/more + cp fileB $mnt/more + cp fileC $mnt/more + cp randA $mnt/more + cp randB $mnt/more + cp randC $mnt/more + sync +} + +_verify_data_on_mnt() { + diff randA $mnt/randA + diff randB $mnt/randB + diff randC $mnt/randC + diff fileA $mnt/1/fileA + diff fileB $mnt/1/fileB + diff fileC $mnt/1/fileC + diff fileA $mnt/2/fileA + diff fileB $mnt/2/fileB + diff fileC $mnt/2/fileC +} + +_verify_more_data_on_mnt() { + diff randA $mnt/more/randA + diff randB $mnt/more/randB + diff randC $mnt/more/randC + diff fileA $mnt/more/fileA + diff fileB $mnt/more/fileB + diff fileC $mnt/more/fileC +} + +_verify_data_on_lv() { + lvchange -ay $vg/$lv1 + mount "$DM_DEV_DIR/$vg/$lv1" $mnt + _verify_data_on_mnt + rm $mnt/randA + rm $mnt/randB + rm $mnt/randC + rm -rf $mnt/1 + rm -rf $mnt/2 + umount $mnt + lvchange -an $vg/$lv1 +} + +vgcreate $SHARED $vg "$dev1" +vgextend $vg "$dev2" + +# Use a large enough size so that the cleaner will not +# finish immediately when detaching, and will require +# a secondary check from command top level. + +lvcreate -n $lv1 -L 560M -an $vg "$dev1" +lvcreate -n $lv2 -L 500M -an $vg "$dev2" + +lvchange -ay $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" + +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +dmsetup table $vg-$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" + +_add_new_data_to_mnt +_add_more_data_to_mnt +_verify_data_on_mnt + +dd if=/dev/zero of=$mnt/big1 bs=1M count=100 oflag=sync +dd if=/dev/zero of=$mnt/big2 bs=1M count=100 oflag=sync +dd if=/dev/zero of=$mnt/big3 bs=1M count=100 oflag=sync +dd if=/dev/zero of=$mnt/big4 bs=1M count=100 oflag=sync + +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +dmsetup table $vg-$lv1 +_verify_data_on_mnt +_verify_more_data_on_mnt +dd if=$mnt/big4 of=/dev/null bs=1M count=100 +umount $mnt +lvchange -an $vg/$lv1 +_verify_data_on_lv +lvchange -an $vg/$lv2 +lvremove $vg/$lv1 +lvremove $vg/$lv2 + +vgremove -ff $vg + diff --git a/test/shell/writecache-split.sh b/test/shell/writecache-split.sh index 0f2dc47..e615e2a 100644 --- a/test/shell/writecache-split.sh +++ b/test/shell/writecache-split.sh @@ -20,29 +20,21 @@ mkfs_mount_umount() { lvt=$1 - lvchange -ay $vg/$lvt - mkfs.xfs -f -s size=4096 "$DM_DEV_DIR/$vg/$lvt" mount "$DM_DEV_DIR/$vg/$lvt" "$mount_dir" cp pattern1 "$mount_dir/pattern1" dd if=/dev/zero of="$mount_dir/zeros2M" bs=1M count=32 conv=fdatasync umount "$mount_dir" - - lvchange -an $vg/$lvt } mount_umount() { lvt=$1 - lvchange -ay $vg/$lvt - mount "$DM_DEV_DIR/$vg/$lvt" "$mount_dir" diff pattern1 "$mount_dir/pattern1" dd if="$mount_dir/zeros2M" of=/dev/null bs=1M count=32 umount "$mount_dir" - - lvchange -an $vg/$lvt } aux have_writecache 1 0 0 || skip @@ -62,18 +54,38 @@ lvcreate -n $lv1 -l 16 -an $vg "$dev1" "$dev4" lvcreate -n $lv2 -l 4 -an $vg "$dev2" # -# split when no devs are missing +# split while inactive # lvconvert -y --type writecache --cachevol $lv2 $vg/$lv1 +lvchange -ay $vg/$lv1 mkfs_mount_umount $lv1 +lvchange -an $vg/$lv1 lvconvert --splitcache $vg/$lv1 lvs -o segtype $vg/$lv1 | grep linear lvs -o segtype $vg/$lv2 | grep linear +lvchange -ay $vg/$lv1 mount_umount $lv1 +lvchange -an $vg/$lv1 + +# +# split while active +# + +lvconvert -y --type writecache --cachevol $lv2 $vg/$lv1 + +lvchange -ay $vg/$lv1 +mkfs_mount_umount $lv1 + +lvconvert --splitcache $vg/$lv1 +lvs -o segtype $vg/$lv1 | grep linear +lvs -o segtype $vg/$lv2 | grep linear + +mount_umount $lv1 +lvchange -an $vg/$lv1 # # split while cachevol is missing @@ -81,7 +93,9 @@ mount_umount $lv1 lvconvert -y --type writecache --cachevol $lv2 $vg/$lv1 +lvchange -ay $vg/$lv1 mkfs_mount_umount $lv1 +lvchange -an $vg/$lv1 aux disable_dev "$dev2" @@ -108,7 +122,9 @@ lvcreate -n $lv2 -l 14 -an $vg "$dev2" "$dev3" lvconvert -y --type writecache --cachevol $lv2 $vg/$lv1 +lvchange -ay $vg/$lv1 mkfs_mount_umount $lv1 +lvchange -an $vg/$lv1 aux disable_dev "$dev3" diff --git a/test/shell/writecache.sh b/test/shell/writecache.sh index 8852e93..39ef319 100644 --- a/test/shell/writecache.sh +++ b/test/shell/writecache.sh @@ -19,152 +19,251 @@ SKIP_WITH_LVMPOLLD=1 aux have_writecache 1 0 0 || skip which mkfs.xfs || skip -mount_dir="mnt" -mkdir -p $mount_dir +# scsi_debug devices with 512 LBS 512 PBS +aux prepare_scsi_debug_dev 256 +check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "512" +check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "512" +aux prepare_devs 2 64 + +# scsi_debug devices with 512 LBS and 4K PBS +#aux prepare_scsi_debug_dev 256 sector_size=512 physblk_exp=3 +#check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size "512" +#check sysfs "$(< SCSI_DEBUG_DEV)" queue/physical_block_size "4096" +#aux prepare_devs 2 64 + +# loop devs with 512 LBS and 512 PBS +#dd if=/dev/zero of=loopa bs=$((1024*1024)) count=64 2> /dev/null +#dd if=/dev/zero of=loopb bs=$((1024*1024)) count=64 2> /dev/null +#LOOP1=$(losetup -f loopa --show) +#LOOP2=$(losetup -f loopb --show) +#aux extend_filter "a|$LOOP1|" +#aux extend_filter "a|$LOOP2|" +#aux lvmconf 'devices/scan = "/dev"' +#dev1=$LOOP1 +#dev2=$LOOP2 + +# loop devs with 4096 LBS and 4096 PBS +#dd if=/dev/zero of=loopa bs=$((1024*1024)) count=64 2> /dev/null +#dd if=/dev/zero of=loopb bs=$((1024*1024)) count=64 2> /dev/null +#LOOP1=$(losetup -f loopa --sector-size 4096 --show) +#LOOP2=$(losetup -f loopb --sector-size 4096 --show) +#aux extend_filter "a|$LOOP1|" +#aux extend_filter "a|$LOOP2|" +#aux lvmconf 'devices/scan = "/dev"' +#dev1=$LOOP1 +#dev2=$LOOP2 + +# the default is brd ram devs with 512 LBS 4K PBS +# aux prepare_devs 2 64 + +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" + + +mnt="mnt" +mkdir -p $mnt + +for i in `seq 1 16384`; do echo -n "A" >> fileA; done +for i in `seq 1 16384`; do echo -n "B" >> fileB; done +for i in `seq 1 16384`; do echo -n "C" >> fileC; done # generate random data -dd if=/dev/urandom of=pattern1 bs=512K count=1 +dd if=/dev/urandom of=randA bs=512K count=2 +dd if=/dev/urandom of=randB bs=512K count=3 +dd if=/dev/urandom of=randC bs=512K count=4 + +_add_new_data_to_mnt() { + mkfs.xfs -f "$DM_DEV_DIR/$vg/$lv1" + + mount "$DM_DEV_DIR/$vg/$lv1" $mnt + + # add original data + cp randA $mnt + cp randB $mnt + cp randC $mnt + mkdir $mnt/1 + cp fileA $mnt/1 + cp fileB $mnt/1 + cp fileC $mnt/1 + mkdir $mnt/2 + cp fileA $mnt/2 + cp fileB $mnt/2 + cp fileC $mnt/2 + sync +} + +_add_more_data_to_mnt() { + mkdir $mnt/more + cp fileA $mnt/more + cp fileB $mnt/more + cp fileC $mnt/more + cp randA $mnt/more + cp randB $mnt/more + cp randC $mnt/more + sync +} + +_verify_data_on_mnt() { + diff randA $mnt/randA + diff randB $mnt/randB + diff randC $mnt/randC + diff fileA $mnt/1/fileA + diff fileB $mnt/1/fileB + diff fileC $mnt/1/fileC + diff fileA $mnt/2/fileA + diff fileB $mnt/2/fileB + diff fileC $mnt/2/fileC +} + +_verify_more_data_on_mnt() { + diff randA $mnt/more/randA + diff randB $mnt/more/randB + diff randC $mnt/more/randC + diff fileA $mnt/more/fileA + diff fileB $mnt/more/fileB + diff fileC $mnt/more/fileC +} + +_verify_data_on_lv() { + lvchange -ay $vg/$lv1 + mount "$DM_DEV_DIR/$vg/$lv1" $mnt + _verify_data_on_mnt + rm $mnt/randA + rm $mnt/randB + rm $mnt/randC + rm -rf $mnt/1 + rm -rf $mnt/2 + umount $mnt + lvchange -an $vg/$lv1 +} -aux prepare_devs 2 64 vgcreate $SHARED $vg "$dev1" - vgextend $vg "$dev2" -lvcreate -n $lv1 -l 8 -an $vg "$dev1" - -lvcreate -n $lv2 -l 4 -an $vg "$dev2" +blockdev --getss "$dev1" +blockdev --getpbsz "$dev1" +blockdev --getss "$dev2" +blockdev --getpbsz "$dev2" -# test1: create fs on LV before writecache is attached +# Test attach while inactive, detach while inactive +# create fs on LV before writecache is attached +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" lvchange -ay $vg/$lv1 - -mkfs.xfs -f -s size=4096 "$DM_DEV_DIR/$vg/$lv1" - -mount "$DM_DEV_DIR/$vg/$lv1" $mount_dir - -cp pattern1 $mount_dir/pattern1 - -umount $mount_dir +_add_new_data_to_mnt +umount $mnt lvchange -an $vg/$lv1 - lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 - check lv_field $vg/$lv1 segtype writecache - lvs -a $vg/${lv2}_cvol --noheadings -o segtype >out grep linear out - lvchange -ay $vg/$lv1 - -mount "$DM_DEV_DIR/$vg/$lv1" $mount_dir - -diff pattern1 $mount_dir/pattern1 - -cp pattern1 $mount_dir/pattern1b - -ls -l $mount_dir - -umount $mount_dir - +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +_add_more_data_to_mnt +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt lvchange -an $vg/$lv1 - lvconvert --splitcache $vg/$lv1 - check lv_field $vg/$lv1 segtype linear check lv_field $vg/$lv2 segtype linear - lvchange -ay $vg/$lv1 -lvchange -ay $vg/$lv2 - -mount "$DM_DEV_DIR/$vg/$lv1" $mount_dir - -ls -l $mount_dir - -diff pattern1 $mount_dir/pattern1 -diff pattern1 $mount_dir/pattern1b - -umount $mount_dir +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" lvchange -an $vg/$lv1 +_verify_data_on_lv lvchange -an $vg/$lv2 +lvremove $vg/$lv1 +lvremove $vg/$lv2 -# test2: create fs on LV after writecache is attached +# Test attach while inactive, detach while inactive +# create fs on LV after writecache is attached +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 - check lv_field $vg/$lv1 segtype writecache - lvs -a $vg/${lv2}_cvol --noheadings -o segtype >out grep linear out - lvchange -ay $vg/$lv1 - -mkfs.xfs -f -s size=4096 "$DM_DEV_DIR/$vg/$lv1" - -mount "$DM_DEV_DIR/$vg/$lv1" $mount_dir - -cp pattern1 $mount_dir/pattern1 -ls -l $mount_dir - -umount $mount_dir +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_new_data_to_mnt +umount $mnt lvchange -an $vg/$lv1 - lvconvert --splitcache $vg/$lv1 - -check lv_field $vg/$lv1 segtype linear -check lv_field $vg/$lv2 segtype linear - lvchange -ay $vg/$lv1 -lvchange -ay $vg/$lv2 - -mount "$DM_DEV_DIR/$vg/$lv1" $mount_dir - -ls -l $mount_dir - -diff pattern1 $mount_dir/pattern1 - -umount $mount_dir +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +mount "$DM_DEV_DIR/$vg/$lv1" $mnt +_add_more_data_to_mnt +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt lvchange -an $vg/$lv1 -lvchange -an $vg/$lv2 - - -# test3: attach writecache to an active LV - -lvchange -ay $vg/$lv1 - -mkfs.xfs -f -s size=4096 "$DM_DEV_DIR/$vg/$lv1" - -mount "$DM_DEV_DIR/$vg/$lv1" $mount_dir - -cp pattern1 $mount_dir/pattern1 -ls -l $mount_dir - -# TODO BZ 1808012 - can not convert active volume to writecache: -not lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 - -if false; then -check lv_field $vg/$lv1 segtype writecache - -lvs -a $vg/${lv2}_cvol --noheadings -o segtype >out -grep linear out - -cp pattern1 $mount_dir/pattern1.after +_verify_data_on_lv +lvremove $vg/$lv1 +lvremove $vg/$lv2 -diff pattern1 $mount_dir/pattern1 -diff pattern1 $mount_dir/pattern1.after +# Test attach while active, detach while active -umount $mount_dir -lvchange -an $vg/$lv1 +lvcreate -n $lv1 -l 8 -an $vg "$dev1" +lvcreate -n $lv2 -l 4 -an $vg "$dev2" lvchange -ay $vg/$lv1 -mount "$DM_DEV_DIR/$vg/$lv1" $mount_dir - -diff pattern1 $mount_dir/pattern1 -diff pattern1 $mount_dir/pattern1.after -fi - -umount $mount_dir +_add_new_data_to_mnt +lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_add_more_data_to_mnt +_verify_data_on_mnt +lvconvert --splitcache $vg/$lv1 +check lv_field $vg/$lv1 segtype linear +check lv_field $vg/$lv2 segtype linear +blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +_verify_data_on_mnt +_verify_more_data_on_mnt +umount $mnt lvchange -an $vg/$lv1 +lvchange -an $vg/$lv2 +_verify_data_on_lv lvremove $vg/$lv1 +lvremove $vg/$lv2 +# FIXME: test depends on unpushed commit +# that enables two stage flush using cleaner +# +# Test attach while active, detach while active, +# skip cleaner so flush message is used instead +# +# lvcreate -n $lv1 -l 8 -an $vg "$dev1" +# lvcreate -n $lv2 -l 4 -an $vg "$dev2" +# lvchange -ay $vg/$lv1 +# _add_new_data_to_mnt +# lvconvert --yes --type writecache --cachevol $lv2 $vg/$lv1 +# blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +# blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +# _add_more_data_to_mnt +# _verify_data_on_mnt +# lvconvert --splitcache --cachesettings cleaner=0 $vg/$lv1 +# check lv_field $vg/$lv1 segtype linear +# check lv_field $vg/$lv2 segtype linear +# blockdev --getss "$DM_DEV_DIR/$vg/$lv1" +# blockdev --getpbsz "$DM_DEV_DIR/$vg/$lv1" +# _verify_data_on_mnt +# _verify_more_data_on_mnt +# umount $mnt +# lvchange -an $vg/$lv1 +# lvchange -an $vg/$lv2 +# _verify_data_on_lv +# lvremove $vg/$lv1 +# lvremove $vg/$lv2 + vgremove -ff $vg - + diff --git a/tools/args.h b/tools/args.h index d1f604b..3a7e5d4 100644 --- a/tools/args.h +++ b/tools/args.h @@ -126,6 +126,12 @@ arg(cachepool_ARG, '\0', "cachepool", lv_VAL, 0, 0, arg(cachevol_ARG, '\0', "cachevol", lv_VAL, 0, 0, "The name of a cache volume.\n") +arg(cachedevice_ARG, '\0', "cachedevice", pv_VAL, ARG_GROUPABLE, 0, + "The name of a device to use for a cache.\n") + +arg(cachesize_ARG, '\0', "cachesize", sizemb_VAL, 0, 0, + "The size of cache to use.\n") + arg(commandprofile_ARG, '\0', "commandprofile", string_VAL, 0, 0, "The command profile to use for command configuration.\n" "See \\fBlvm.conf\\fP(5) for more information about profiles.\n") @@ -1428,7 +1434,16 @@ arg(thin_ARG, 'T', "thin", 0, 0, 0, "See \\fBlvmthin\\fP(7) for more information about LVM thin provisioning.\n") arg(updatemetadata_ARG, '\0', "updatemetadata", 0, 0, 0, - "Update VG metadata to correct problems.\n") + "Update VG metadata to correct problems.\n" + "If VG metadata was updated while a PV was missing, and the PV\n" + "reappears with an old version of metadata, then this option\n" + "(or any other command that writes metadata) will update the\n" + "metadata on the previously missing PV. If a PV was removed\n" + "from a VG while it was missing, and the PV reappears, using\n" + "this option will clear the outdated metadata from the previously\n" + "missing PV. If metadata text is damaged on one PV, using this\n" + "option will replace the damaged metadata text. For more severe\n" + "damage, e.g. with headers, see \\fBpvck\\fP(8).\n") arg(uuid_ARG, 'u', "uuid", 0, 0, 0, "#pvchange\n" diff --git a/tools/command-lines.in b/tools/command-lines.in index ed3d041..1b0ca22 100644 --- a/tools/command-lines.in +++ b/tools/command-lines.in @@ -247,7 +247,7 @@ RULE: --profile not --detachprofile RULE: --metadataprofile not --detachprofile RULE: --minrecoveryrate --maxrecoveryrate and LV_raid RULE: --writebehind --writemostly and LV_raid1 -RULE: --cachemode --cachepolicy --cachesettings and LV_cache LV_cachepool +RULE: --cachemode --cachepolicy --cachesettings and LV_cache LV_cachepool LV_writecache RULE: --errorwhenfull --discards --zero and LV_thinpool RULE: --permission not lv_is_external_origin lv_is_raid_metadata lv_is_raid_image LV_thinpool RULE: --alloc --contiguous --metadataprofile --permission --persistent --profile --readahead not lv_is_thick_origin @@ -359,7 +359,8 @@ OP: PV ... ID: lvconvert_raid_types DESC: Convert LV to raid or change raid layout DESC: (a specific raid level must be used, e.g. raid1). -RULE: all not lv_is_locked lv_is_pvmove lv_is_raid_with_integrity +RULE: all not lv_is_locked lv_is_pvmove +RULE: lv_is_raid_with_integrity not --stripes_long --stripesize --regionsize --interval lvconvert --mirrors SNumber LV OO: --regionsize RegionSize, --interval Number, --mirrorlog MirrorLog, OO_LVCONVERT @@ -497,6 +498,20 @@ FLAGS: SECONDARY_SYNTAX --- +lvconvert --type writecache --cachedevice PV LV_linear_striped_raid +OO: OO_LVCONVERT, --cachesize SizeMB, --cachesettings String +ID: lvconvert_to_writecache_with_device +DESC: Add a writecache to an LV, using a specified cache device. +RULE: all and lv_is_visible + +lvconvert --type cache --cachedevice PV LV_linear_striped_raid_thinpool +OO: OO_LVCONVERT, --cachesize SizeMB, --cachesettings String +ID: lvconvert_to_cache_with_device +DESC: Add a cache to an LV, using a specified cache device. +RULE: all and lv_is_visible + +--- + lvconvert --type thin-pool LV_linear_striped_raid_cache OO: --stripes_long Number, --stripesize SizeKB, --discards Discards, OO_LVCONVERT_POOL, OO_LVCONVERT @@ -1205,87 +1220,107 @@ lvcreate --type cache --size SizeMB --cachepool LV_cachepool VG OO: --cache, OO_LVCREATE_POOL, OO_LVCREATE_CACHE, OO_LVCREATE, --stripes Number, --stripesize SizeKB OP: PV ... -ID: lvcreate_cache_vol_with_new_origin -DESC: Create a cache LV, first creating a new origin LV, -DESC: then combining it with the existing cache pool named -DESC: by the --cachepool arg. +ID: lvcreate_and_attach_cachepool +DESC: Create a new LV, then attach the specified cachepool +DESC: which converts the new LV to type cache. # alternate form of lvcreate --type cache +# (omits the --type cache option which is inferred) lvcreate --size SizeMB --cachepool LV_cachepool VG OO: --type cache, --cache, OO_LVCREATE_CACHE, OO_LVCREATE, --stripes Number, --stripesize SizeKB OP: PV ... -ID: lvcreate_cache_vol_with_new_origin -DESC: Create a cache LV, first creating a new origin LV, -DESC: then combining it with the existing cache pool named -DESC: by the --cachepool arg (variant, infers --type cache). +ID: lvcreate_and_attach_cachepool_v2 +DESC: Create a new LV, then attach the specified cachepool +DESC: which converts the new LV to type cache +DESC: (variant, infers --type cache.) FLAGS: SECONDARY_SYNTAX # alternate form of lvcreate --type cache +# (moves cachepool from option arg to position arg, +# dropping the normal VG position arg) lvcreate --type cache --size SizeMB LV_cachepool OO: --cache, OO_LVCREATE_POOL, OO_LVCREATE_CACHE, OO_LVCREATE, --stripes Number, --stripesize SizeKB OP: PV ... -ID: lvcreate_cache_vol_with_new_origin -DESC: Create a cache LV, first creating a new origin LV, -DESC: then combining it with the existing cache pool named -DESC: in the first arg (variant, also use --cachepool). +ID: lvcreate_and_attach_cachepool_v3 +DESC: Create a new LV, then attach the specified cachepool +DESC: which converts the new LV to type cache. +DESC: (variant, also use --cachepool). FLAGS: SECONDARY_SYNTAX -# This is a ridiculously crazy command which nobody could -# understand. It should be be eliminated. It does two different -# things depending on whether LV in pos 1 is a cachepool LV -# or not. Both variations are unnecessary. -# -# 1. If LV is a cachepool, then it's an alternate form of -# an already complicated command above. -# -# # alternate form for lvcreate_cache_vol_with_new_origin -# lvcreate --cache --size SizeMB LV_cachepool -# OO: --type cache, --cache, OO_LVCREATE_CACHE, OO_LVCREATE, --stripes Number, --stripesize SizeKB -# OP: PV ... -# ID: lvcreate_cache_vol_with_new_origin -# DESC: Create a cache LV, first creating a new origin LV, -# DESC: then combining it with the existing cache pool named -# DESC: in the first arg (variant, infers --type cache, -# DESC: also use --cachepool). -# -# 2. If LV is not a cachepool, then it's a disguised lvconvert. -# -# # FIXME: this should be done by lvconvert, and this command removed -# lvcreate --type cache --size SizeMB LV -# OO: OO_LVCREATE_POOL, OO_LVCREATE_CACHE, OO_LVCREATE -# OP: PV ... -# ID: lvcreate_convert_to_cache_vol_with_cachepool -# DESC: Convert the specified LV to type cache after creating a new -# DESC: cache pool LV to use (use lvconvert). +# This command has two different meanings which ought to +# have separate command defs, but since the syntax is the +# same for both they have to share one command def with +# an ambiguous meaning. Which command is performed depends +# on whether the LV in the first arg position is a +# cachepool or not (we can't have two different command +# defs that differ only in the type of LV in the arg position +# because when parsing commands we don't know the LV type.) +# +# 1. An alternate form of lvcreate_and_attach_cachepool_v3 +# this syntax: lvcreate --cache --size SizeMB LV_cachepool +# is alternative for: lvcreate --type cache --size SizeMB LV_cachepool +# +# 2. An alternative to using lvconvert to convert LV to type cache, +# but in this case the cachepool is created internally and +# then attached to the LV arg. # # Note that stripes are accepted by the first and not by the # second, but it's not possible to validate this until after # the LV type is known. -# -# So, to define this syntax we have to combine both of -# those variants, each crazy on it's own, into one -# ridiculous command. -# def1: alternate form of lvcreate --type cache, or -# def2: it should be done by lvconvert. lvcreate --cache --size SizeMB LV OO: OO_LVCREATE_CACHE, OO_LVCREATE_POOL, OO_LVCREATE, --stripes Number, --stripesize SizeKB OP: PV ... -ID: lvcreate_cache_vol_with_new_origin_or_convert_to_cache_vol_with_cachepool -DESC: When LV is a cache pool, create a cache LV, -DESC: first creating a new origin LV, then combining it with -DESC: the existing cache pool named in the first arg -DESC: (variant, infers --type cache, also use --cachepool). -DESC: When LV is not a cache pool, convert the specified LV -DESC: to type cache after creating a new cache pool LV to use -DESC: (use lvconvert). +ID: lvcreate_new_plus_old_cachepool_or_lvconvert_old_plus_new_cachepool +DESC: When the LV arg is a cachepool, then create a new LV and +DESC: attach the cachepool arg to it. +DESC: (variant, use --type cache and --cachepool.) +DESC: When the LV arg is not a cachepool, then create a new cachepool +DESC: and attach it to the LV arg (alternative, use lvconvert.) FLAGS: SECONDARY_SYNTAX --- +# These all create a new origin LV, then forwards to lvconvert +# which combines it with a cachevol (which already exists or +# which needs to be created from cachedevice), converting +# the new LV to type cache or writecache. + +lvcreate --type cache --size SizeMB --cachevol LV VG +OO: OO_LVCREATE, OO_LVCREATE_CACHE, --stripes Number, --stripesize SizeKB +OP: PV ... +ID: lvcreate_and_attach_cachevol_for_cache +DESC: Create a new LV, then attach the specified cachevol +DESC: which converts the new LV to type cache. + +lvcreate --type cache --size SizeMB --cachedevice PV VG +OO: OO_LVCREATE, OO_LVCREATE_CACHE, --cachesize SizeMB, --stripes Number, --stripesize SizeKB +OP: PV ... +ID: lvcreate_and_attach_cachedevice_for_cache +DESC: Create a new LV, then attach a cachevol created from +DESC: the specified cache device, which converts the +DESC: new LV to type cache. + +lvcreate --type writecache --size SizeMB --cachevol LV VG +OO: OO_LVCREATE, --cachesettings String, --stripes Number, --stripesize SizeKB +OP: PV ... +ID: lvcreate_and_attach_cachevol_for_writecache +DESC: Create a new LV, then attach the specified cachevol +DESC: which converts the new LV to type writecache. + +lvcreate --type writecache --size SizeMB --cachedevice PV VG +OO: OO_LVCREATE, --cachesize SizeMB, --cachesettings String, --stripes Number, --stripesize SizeKB +OP: PV ... +ID: lvcreate_and_attach_cachedevice_for_writecache +DESC: Create a new LV, then attach a cachevol created from +DESC: the specified cache device, which converts the +DESC: new LV to type writecache. + +--- + lvdisplay OO: --aligned, --all, --binary, --colon, --columns, --configreport ConfigReport, --foreign, --history, --ignorelockingfailure, diff --git a/tools/command.c b/tools/command.c index 511dda1..2d01849 100644 --- a/tools/command.c +++ b/tools/command.c @@ -1420,6 +1420,9 @@ int define_commands(struct cmd_context *cmdtool, const char *run_name) if (line[0] == '\n') break; + if (!strcmp(line, "---") || !strcmp(line, "--")) + continue; + if ((n = strchr(line, '\n'))) *n = '\0'; diff --git a/tools/lvchange.c b/tools/lvchange.c index 2d5bb32..c0adadf 100644 --- a/tools/lvchange.c +++ b/tools/lvchange.c @@ -606,6 +606,88 @@ static int _lvchange_persistent(struct cmd_context *cmd, return 1; } +static int _lvchange_writecache(struct cmd_context *cmd, + struct logical_volume *lv, + uint32_t *mr) +{ + struct writecache_settings settings = { 0 }; + uint32_t block_size_sectors = 0; + struct lv_segment *seg = first_seg(lv); + int set_count = 0; + + if (!get_writecache_settings(cmd, &settings, &block_size_sectors)) + return_0; + + if (block_size_sectors && (seg->writecache_block_size != (block_size_sectors * 512))) { + log_error("Cannot change existing block size %u bytes.", seg->writecache_block_size); + return 0; + } + + if (settings.high_watermark_set) { + seg->writecache_settings.high_watermark_set = settings.high_watermark_set; + seg->writecache_settings.high_watermark = settings.high_watermark; + set_count++; + } + if (settings.low_watermark_set) { + seg->writecache_settings.low_watermark_set = settings.low_watermark_set; + seg->writecache_settings.low_watermark = settings.low_watermark; + set_count++; + } + if (settings.writeback_jobs_set) { + seg->writecache_settings.writeback_jobs_set = settings.writeback_jobs_set; + seg->writecache_settings.writeback_jobs = settings.writeback_jobs; + set_count++; + } + if (settings.autocommit_blocks_set) { + seg->writecache_settings.autocommit_blocks_set = settings.autocommit_blocks_set; + seg->writecache_settings.autocommit_blocks = settings.autocommit_blocks; + set_count++; + } + if (settings.autocommit_time_set) { + seg->writecache_settings.autocommit_time_set = settings.autocommit_time_set; + seg->writecache_settings.autocommit_time = settings.autocommit_time; + set_count++; + } + if (settings.fua_set) { + seg->writecache_settings.fua_set = settings.fua_set; + seg->writecache_settings.fua = settings.fua; + set_count++; + } + if (settings.nofua_set) { + seg->writecache_settings.nofua_set = settings.nofua_set; + seg->writecache_settings.nofua = settings.nofua; + set_count++; + } + if (settings.cleaner_set) { + seg->writecache_settings.cleaner_set = settings.cleaner_set; + seg->writecache_settings.cleaner = settings.cleaner; + set_count++; + } + if (settings.max_age_set) { + seg->writecache_settings.max_age_set = settings.max_age_set; + seg->writecache_settings.max_age = settings.max_age; + set_count++; + } + + if (!set_count) { + /* + * Empty settings can be used to clear all current settings, + * lvchange --cachesettings "" vg/lv + */ + if (!arg_count(cmd, yes_ARG) && + yes_no_prompt("Clear all writecache settings? ") == 'n') { + log_print("No settings changed."); + return 1; + } + memset(&seg->writecache_settings, 0, sizeof(struct writecache_settings)); + } + + /* Request caller to commit and reload metadata */ + *mr |= MR_RELOAD; + + return 1; +} + static int _lvchange_cache(struct cmd_context *cmd, struct logical_volume *lv, uint32_t *mr) @@ -619,6 +701,9 @@ static int _lvchange_cache(struct cmd_context *cmd, int r = 0, is_clean; uint32_t chunk_size = 0; /* FYI: lvchange does NOT support its change */ + if (lv_is_writecache(lv)) + return _lvchange_writecache(cmd, lv, mr); + seg = first_seg(lv); if (seg_is_cache(seg) && lv_is_cache_vol(seg->pool_lv)) diff --git a/tools/lvconvert.c b/tools/lvconvert.c index 8652252..524ed5a 100644 --- a/tools/lvconvert.c +++ b/tools/lvconvert.c @@ -1319,6 +1319,8 @@ static int _raid4_conversion_supported(struct logical_volume *lv, struct lvconve static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *lp) { int image_count = 0; + int images_reduced = 0; + int type_enforced = 0; struct cmd_context *cmd = lv->vg->cmd; struct lv_segment *seg = first_seg(lv); @@ -1357,6 +1359,8 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l else image_count = lp->mirrors + 1; + images_reduced = (image_count < lv_raid_image_count(lv)); + if (image_count < 1) { log_error("Unable to %s images by specified amount.", lp->keep_mimages ? "split" : "reduce"); @@ -1369,6 +1373,12 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l display_lvname(lv)); return 0; } + + if (!*lp->type_str) { + lp->type_str = SEG_TYPE_NAME_RAID1; + lp->segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID1); + type_enforced = 1; + } } if ((lp->corelog || lp->mirrorlog) && strcmp(lp->type_str, SEG_TYPE_NAME_MIRROR)) { @@ -1383,7 +1393,7 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l return lv_raid_split(lv, lp->yes, lp->lv_split_name, image_count, lp->pvh); if (lp->mirrors_supplied) { - if ((seg_is_striped(seg) && seg->area_count == 1) || seg_is_raid1(seg)) { /* ??? */ + if (seg_is_linear(seg) || seg_is_raid1(seg)) { /* ??? */ if (!*lp->type_str || !strcmp(lp->type_str, SEG_TYPE_NAME_RAID1) || !strcmp(lp->type_str, SEG_TYPE_NAME_LINEAR) || (!strcmp(lp->type_str, SEG_TYPE_NAME_STRIPED) && image_count == 1)) { if (image_count > DEFAULT_RAID1_MAX_IMAGES) { @@ -1400,7 +1410,7 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l lp->region_size : seg->region_size , lp->pvh)) return_0; - if (lv_raid_has_integrity(lv)) { + if (lv_raid_has_integrity(lv) && !images_reduced) { struct integrity_settings *isettings = NULL; if (!lv_get_raid_integrity_settings(lv, &isettings)) return_0; @@ -1446,7 +1456,7 @@ static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *l /* FIXME This needs changing globally. */ if (!arg_is_set(cmd, stripes_long_ARG)) lp->stripes = 0; - if (!arg_is_set(cmd, type_ARG)) + if (!type_enforced && !arg_is_set(cmd, type_ARG)) lp->segtype = NULL; if (!arg_is_set(cmd, regionsize_ARG)) lp->region_size = 0; @@ -1474,7 +1484,7 @@ try_new_takeover_or_reshape: /* FIXME This needs changing globally. */ if (!arg_is_set(cmd, stripes_long_ARG)) lp->stripes = 0; - if (!arg_is_set(cmd, type_ARG)) + if (!type_enforced && !arg_is_set(cmd, type_ARG)) lp->segtype = NULL; if (!lv_raid_convert(lv, lp->segtype, @@ -3276,7 +3286,11 @@ static int _lvconvert_to_pool(struct cmd_context *cmd, } metadata_lv->status &= ~LV_ACTIVATION_SKIP; - if (!wipe_lv(metadata_lv, (struct wipe_params) { .do_zero = 1 })) { + if (!wipe_lv(metadata_lv, (struct wipe_params) { + .do_wipe_signatures = 1, + .is_metadata = 1, + .yes = arg_count(cmd, yes_ARG), + .force = arg_count(cmd, force_ARG) } )) { log_error("Aborting. Failed to wipe metadata lv."); goto bad; } @@ -4245,51 +4259,205 @@ int lvconvert_to_pool_cmd(struct cmd_context *cmd, int argc, char **argv) NULL, NULL, &_lvconvert_to_pool_single); } -static int _lvconvert_cachevol_attach_single(struct cmd_context *cmd, - struct logical_volume *lv, - struct processing_handle *handle) +#define MAX_CACHEDEVS 8 + +static int _lv_create_cachevol(struct cmd_context *cmd, + struct volume_group *vg, + struct logical_volume *lv, + struct logical_volume **cachevol_lv) { - struct volume_group *vg = lv->vg; - struct logical_volume *cachevol_lv; - const char *cachevol_name; + char cvname[NAME_LEN]; + char format[NAME_LEN]; + struct dm_list *use_pvh; + struct pv_list *pvl; + char *dev_name; + struct device *dev_fast; + char *dev_argv[MAX_CACHEDEVS]; + int dev_argc = 0; + uint64_t cache_size_sectors = 0; + uint64_t full_size_sectors = 0; + uint64_t pv_size_sectors; + struct logical_volume *cachevol; + struct arg_value_group_list *group; + struct lvcreate_params lp = { + .activate = CHANGE_AN, + .alloc = ALLOC_INHERIT, + .major = -1, + .minor = -1, + .permission = LVM_READ | LVM_WRITE, + .pvh = &vg->pvs, + .read_ahead = DM_READ_AHEAD_NONE, + .stripes = 1, + .vg_name = vg->name, + .zero = 0, + .wipe_signatures = 0, + .suppress_zero_warn = 1, + }; - if (!(cachevol_name = arg_str_value(cmd, cachevol_ARG, NULL))) - goto_out; + /* + * If cache size is not set, and all cachedevice's are unused, + * then the cache size is the sum of all cachedevice sizes. + */ + cache_size_sectors = arg_uint64_value(cmd, cachesize_ARG, 0); - if (!validate_lvname_param(cmd, &vg->name, &cachevol_name)) - goto_out; + dm_list_iterate_items(group, &cmd->arg_value_groups) { + if (!grouped_arg_is_set(group->arg_values, cachedevice_ARG)) + continue; - if (!(cachevol_lv = find_lv(vg, cachevol_name))) { - log_error("Cache single %s not found.", cachevol_name); - goto out; + if (!(dev_name = (char *)grouped_arg_str_value(group->arg_values, cachedevice_ARG, NULL))) + break; + + if (dev_name[0] == '@') { + if (!cache_size_sectors) { + log_error("With tag as cachedevice, --cachesize is required."); + return 0; + } + goto add_dev_arg; + } + + if (!(dev_fast = dev_cache_get(cmd, dev_name, cmd->filter))) { + log_error("Device %s not found.", dev_name); + return 0; + } + + if (!(pvl = find_pv_in_vg(vg, dev_name))) { + log_error("PV %s not found in VG.", dev_name); + return 0; + } + + /* + * If the dev is used in the VG, then require a cachesize to allocate + * from it. If it is not used in the VG, then prompt asking if the + * entire dev should be used. + */ + if (!cache_size_sectors && pvl->pv->pe_alloc_count) { + log_error("PV %s is in use, --cachesize is required.", dev_name); + return 0; + } + + if (!cache_size_sectors) { + pv_size_sectors = (pvl->pv->pe_count * vg->extent_size); + + if (!arg_is_set(cmd, yes_ARG) && + yes_no_prompt("Use all %s from %s for cache? [y/n]: ", + display_size(cmd, pv_size_sectors), dev_name) == 'n') { + log_print("Use --cachesize SizeMB to use a part of the cachedevice."); + log_error("Conversion aborted."); + return 0; + } + full_size_sectors += pv_size_sectors; + } + add_dev_arg: + if (dev_argc >= MAX_CACHEDEVS) { + log_error("Cannot allocate from more than %u cache devices.", MAX_CACHEDEVS); + return 0; + } + + dev_argv[dev_argc++] = dev_name; } - if (lv_is_cache_vol(cachevol_lv)) { - log_error("LV %s is already used as a cachevol.", display_lvname(cachevol_lv)); - goto out; + if (!cache_size_sectors) + cache_size_sectors = full_size_sectors; + + if (!dev_argc) { + log_error("No cachedevice specified to create a cachevol."); + return 0; } - /* Ensure the LV is not active elsewhere. */ - if (!lockd_lv(cmd, lv, "ex", 0)) - goto_out; + if (!(use_pvh = create_pv_list(cmd->mem, vg, dev_argc, dev_argv, 1))) { + log_error("cachedevice not found in VG %s.", dev_name); + return 0; + } - if (!dm_list_empty(&cachevol_lv->segs_using_this_lv)) { - log_error("LV %s is already in use.", display_lvname(cachevol_lv)); - goto out; + if (dm_snprintf(cvname, NAME_LEN, "%s_cache", lv->name) < 0) { + log_error("Failed to create cachevol LV name."); + return 0; } - if (!arg_is_set(cmd, yes_ARG) && - yes_no_prompt("Erase all existing data on %s? [y/n]: ", display_lvname(cachevol_lv)) == 'n') { - log_error("Conversion aborted."); - goto out; + if (find_lv(vg, cvname)) { + memset(format, 0, sizeof(cvname)); + memset(cvname, 0, sizeof(cvname)); + if (dm_snprintf(format, sizeof(format), "%s_cache%%d", lv->name) < 0) { + log_error("Failed to generate cachevol LV format."); + return 0; + } + if (!generate_lv_name(vg, format, cvname, sizeof(cvname))) { + log_error("Failed to generate cachevol LV name."); + return 0; + } + } + + lp.lv_name = cvname; + lp.pvh = use_pvh; + lp.extents = cache_size_sectors / vg->extent_size; + + log_print("Creating cachevol LV %s with size %s.", + cvname, display_size(cmd, cache_size_sectors)); + + dm_list_init(&lp.tags); + + if (!(lp.segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_STRIPED))) + return_0; + + if (!(cachevol = lv_create_single(vg, &lp))) { + log_error("Failed to create cachevol LV"); + return 0; + } + + *cachevol_lv = cachevol; + return 1; +} + +int lvconvert_cachevol_attach_single(struct cmd_context *cmd, + struct logical_volume *lv, + struct processing_handle *handle) +{ + struct volume_group *vg = lv->vg; + struct logical_volume *lv_fast; + const char *fast_name; + + /* + * User specifies an existing cachevol to use or a cachedevice + * to create a cachevol from. + */ + if ((fast_name = arg_str_value(cmd, cachevol_ARG, NULL))) { + if (!validate_lvname_param(cmd, &vg->name, &fast_name)) + goto_bad; + + if (!(lv_fast = find_lv(vg, fast_name))) { + log_error("LV %s not found.", fast_name); + goto bad; + } + + if (lv_is_cache_vol(lv_fast)) { + log_error("LV %s is already used as a cachevol.", display_lvname(lv_fast)); + goto bad; + } + + if (!dm_list_empty(&lv_fast->segs_using_this_lv)) { + log_error("LV %s is already in use.", display_lvname(lv_fast)); + goto bad; + } + + if (!arg_is_set(cmd, yes_ARG) && + yes_no_prompt("Erase all existing data on %s? [y/n]: ", display_lvname(lv_fast)) == 'n') { + log_error("Conversion aborted."); + goto bad; + } + + if (!lockd_lv(cmd, lv_fast, "ex", 0)) + goto_bad; + } else { + if (!_lv_create_cachevol(cmd, vg, lv, &lv_fast)) + goto_bad; } /* Ensure the LV is not active elsewhere. */ - if (!lockd_lv(cmd, cachevol_lv, "ex", LDLV_PERSISTENT)) - goto_out; + if (!lockd_lv(cmd, lv, "ex", 0)) + goto_bad; - if (!wipe_cache_pool(cachevol_lv)) - goto_out; + if (!wipe_cache_pool(lv_fast)) + goto_bad; /* When the lv arg is a thinpool, redirect command to data sub lv. */ @@ -4299,17 +4467,17 @@ static int _lvconvert_cachevol_attach_single(struct cmd_context *cmd, } if (_raid_split_image_conversion(lv)) - goto_out; + goto_bad; /* Attach the cache to the main LV. */ - if (!_cache_vol_attach(cmd, lv, cachevol_lv)) - goto_out; + if (!_cache_vol_attach(cmd, lv, lv_fast)) + goto_bad; log_print_unless_silent("Logical volume %s is now cached.", display_lvname(lv)); return ECMD_PROCESSED; - out: + bad: return ECMD_FAILED; } @@ -5308,19 +5476,8 @@ static int _lvconvert_detach_writecache(struct cmd_context *cmd, struct logical_volume *lv, struct logical_volume *lv_fast) { - char cvol_name[NAME_LEN]; - char *c; int noflush = 0; - /* - * LV must be inactive externally before detaching cache. - */ - - if (lv_info(cmd, lv, 1, NULL, 0, 0)) { - log_error("LV %s must be inactive to detach writecache.", display_lvname(lv)); - return 0; - } - if (!archive(lv->vg)) return_0; @@ -5344,36 +5501,23 @@ static int _lvconvert_detach_writecache(struct cmd_context *cmd, noflush = 1; } - if (!lv_detach_writecache_cachevol(lv, noflush)) - return_0; - /* - * Rename lv_fast back to its original name, without the _cvol - * suffix that was added when lv_fast was attached for caching. + * TODO: send a message to writecache in the kernel to start writing + * back cache data to the origin. Then release the vg lock and monitor + * the progress of that writeback. When it's complete we can reacquire + * the vg lock, rescan the vg (ensure it hasn't changed), and do the + * detach which should be quick since the writeback is complete. If + * this command is canceled while monitoring writeback, it should just + * be rerun. The LV will continue to have the writecache until this + * command is run to completion. */ - if (!dm_strncpy(cvol_name, lv_fast->name, sizeof(cvol_name)) || - !(c = strstr(cvol_name, "_cvol"))) { - log_debug("LV %s has no suffix for cachevol (skipping rename).", - display_lvname(lv_fast)); - } else { - *c = 0; - /* If the name is in use, generate new lvol%d */ - if (lv_name_is_used_in_vg(lv->vg, cvol_name, NULL) && - !generate_lv_name(lv->vg, "lvol%d", cvol_name, sizeof(cvol_name))) { - log_error("Failed to generate unique name for unused logical volume."); - return 0; - } - if (!lv_rename_update(cmd, lv_fast, cvol_name, 0)) - return_0; - } - - if (!vg_write(lv->vg) || !vg_commit(lv->vg)) + if (!lv_detach_writecache_cachevol(lv, noflush)) return_0; backup(lv->vg); - log_print_unless_silent("Logical volume %s write cache has been detached.", + log_print_unless_silent("Logical volume %s writecache has been detached.", display_lvname(lv)); return 1; } @@ -5383,7 +5527,8 @@ static int _writecache_zero(struct cmd_context *cmd, struct logical_volume *lv) struct wipe_params wp = { .do_wipe_signatures = 1, /* optional, to print warning if clobbering something */ .do_zero = 1, /* required for dm-writecache to work */ - .zero_sectors = 1 + .yes = arg_count(cmd, yes_ARG), + .force = arg_count(cmd, force_ARG) }; int ret; @@ -5400,7 +5545,8 @@ static int _writecache_zero(struct cmd_context *cmd, struct logical_volume *lv) return 0; } - ret = wipe_lv(lv, wp); + if (!(ret = wipe_lv(lv, wp))) + stack; if (!deactivate_lv(cmd, lv)) { log_error("Failed to deactivate LV %s for zeroing.", display_lvname(lv)); @@ -5410,157 +5556,6 @@ static int _writecache_zero(struct cmd_context *cmd, struct logical_volume *lv) return ret; } -static int _get_one_writecache_setting(struct cmd_context *cmd, struct writecache_settings *settings, - char *key, char *val, uint32_t *block_size_sectors) -{ - /* special case: block_size is not a setting but is set with the --cachesettings option */ - if (!strncmp(key, "block_size", strlen("block_size"))) { - uint32_t block_size = 0; - if (sscanf(val, "%u", &block_size) != 1) - goto_bad; - if (block_size == 512) - *block_size_sectors = 1; - else if (block_size == 4096) - *block_size_sectors = 8; - else - goto_bad; - return 1; - } - - if (!strncmp(key, "high_watermark", strlen("high_watermark"))) { - if (sscanf(val, "%llu", (unsigned long long *)&settings->high_watermark) != 1) - goto_bad; - if (settings->high_watermark > 100) - goto_bad; - settings->high_watermark_set = 1; - return 1; - } - - if (!strncmp(key, "low_watermark", strlen("low_watermark"))) { - if (sscanf(val, "%llu", (unsigned long long *)&settings->low_watermark) != 1) - goto_bad; - if (settings->low_watermark > 100) - goto_bad; - settings->low_watermark_set = 1; - return 1; - } - - if (!strncmp(key, "writeback_jobs", strlen("writeback_jobs"))) { - if (sscanf(val, "%llu", (unsigned long long *)&settings->writeback_jobs) != 1) - goto_bad; - settings->writeback_jobs_set = 1; - return 1; - } - - if (!strncmp(key, "autocommit_blocks", strlen("autocommit_blocks"))) { - if (sscanf(val, "%llu", (unsigned long long *)&settings->autocommit_blocks) != 1) - goto_bad; - settings->autocommit_blocks_set = 1; - return 1; - } - - if (!strncmp(key, "autocommit_time", strlen("autocommit_time"))) { - if (sscanf(val, "%llu", (unsigned long long *)&settings->autocommit_time) != 1) - goto_bad; - settings->autocommit_time_set = 1; - return 1; - } - - if (!strncmp(key, "fua", strlen("fua"))) { - if (settings->nofua_set) { - log_error("Setting fua and nofua cannot both be set."); - return 0; - } - if (sscanf(val, "%u", &settings->fua) != 1) - goto_bad; - settings->fua_set = 1; - return 1; - } - - if (!strncmp(key, "nofua", strlen("nofua"))) { - if (settings->fua_set) { - log_error("Setting fua and nofua cannot both be set."); - return 0; - } - if (sscanf(val, "%u", &settings->nofua) != 1) - goto_bad; - settings->nofua_set = 1; - return 1; - } - - if (settings->new_key) { - log_error("Setting %s is not recognized. Only one unrecognized setting is allowed.", key); - return 0; - } - - log_warn("Unrecognized writecache setting \"%s\" may cause activation failure.", key); - if (yes_no_prompt("Use unrecognized writecache setting? [y/n]: ") == 'n') { - log_error("Aborting writecache conversion."); - return 0; - } - - log_warn("Using unrecognized writecache setting: %s = %s.", key, val); - - settings->new_key = dm_pool_strdup(cmd->mem, key); - settings->new_val = dm_pool_strdup(cmd->mem, val); - return 1; - - bad: - log_error("Invalid setting: %s", key); - return 0; -} - -static int _get_writecache_settings(struct cmd_context *cmd, struct writecache_settings *settings, - uint32_t *block_size_sectors) -{ - struct arg_value_group_list *group; - const char *str; - char key[64]; - char val[64]; - int num; - int pos; - - /* - * "grouped" means that multiple --cachesettings options can be used. - * Each option is also allowed to contain multiple key = val pairs. - */ - - dm_list_iterate_items(group, &cmd->arg_value_groups) { - if (!grouped_arg_is_set(group->arg_values, cachesettings_ARG)) - continue; - - if (!(str = grouped_arg_str_value(group->arg_values, cachesettings_ARG, NULL))) - break; - - pos = 0; - - while (pos < strlen(str)) { - /* scan for "key1=val1 key2 = val2 key3= val3" */ - - memset(key, 0, sizeof(key)); - memset(val, 0, sizeof(val)); - - if (sscanf(str + pos, " %63[^=]=%63s %n", key, val, &num) != 2) { - log_error("Invalid setting at: %s", str+pos); - return 0; - } - - pos += num; - - if (!_get_one_writecache_setting(cmd, settings, key, val, block_size_sectors)) - return_0; - } - } - - if (settings->high_watermark_set && settings->low_watermark_set && - (settings->high_watermark <= settings->low_watermark)) { - log_error("High watermark must be greater than low watermark."); - return 0; - } - - return 1; -} - static struct logical_volume *_lv_writecache_create(struct cmd_context *cmd, struct logical_volume *lv, struct logical_volume *lv_fast, @@ -5605,9 +5600,177 @@ static struct logical_volume *_lv_writecache_create(struct cmd_context *cmd, return lv_wcorig; } -#define DEFAULT_WRITECACHE_BLOCK_SIZE_SECTORS 8 /* 4K */ +/* + * Currently only supports writecache block sizes 512 and 4096. + * This could be expanded later. + */ +static int _set_writecache_block_size(struct cmd_context *cmd, + struct logical_volume *lv, + uint32_t *block_size_sectors) +{ + char pathname[PATH_MAX]; + struct device *fs_dev; + struct dm_list pvs; + struct pv_list *pvl; + uint32_t fs_block_size = 0; + uint32_t block_size_setting = 0; + uint32_t block_size = 0; + int lbs_unknown = 0, lbs_4k = 0, lbs_512 = 0; + int pbs_unknown = 0, pbs_4k = 0, pbs_512 = 0; + int rv; + + /* This is set if the user specified a writecache block size on the command line. */ + if (*block_size_sectors) + block_size_setting = *block_size_sectors * 512; + + dm_list_init(&pvs); + + if (!get_pv_list_for_lv(cmd->mem, lv, &pvs)) { + log_error("Failed to build list of PVs for %s.", display_lvname(lv)); + goto_bad; + } + + dm_list_iterate_items(pvl, &pvs) { + unsigned int pbs = 0; + unsigned int lbs = 0; -static int _lvconvert_writecache_attach_single(struct cmd_context *cmd, + if (!dev_get_direct_block_sizes(pvl->pv->dev, &pbs, &lbs)) { + lbs_unknown++; + pbs_unknown++; + continue; + } + + if (lbs == 4096) + lbs_4k++; + else if (lbs == 512) + lbs_512++; + else + lbs_unknown++; + + if (pbs == 4096) + pbs_4k++; + else if (pbs == 512) + pbs_512++; + else + pbs_unknown++; + } + + if (lbs_4k && lbs_512) { + log_error("Writecache requires consistent logical block size for LV devices."); + goto_bad; + } + + if (lbs_4k && block_size_setting && (block_size_setting < 4096)) { + log_error("Writecache block size %u not allowed with device logical block size 4096.", + block_size_setting); + goto_bad; + } + + if (dm_snprintf(pathname, sizeof(pathname), "%s/%s/%s", cmd->dev_dir, + lv->vg->name, lv->name) < 0) { + log_error("Path name too long to get LV block size %s", display_lvname(lv)); + goto_bad; + } + + if (!sync_local_dev_names(cmd)) + stack; + + if (!(fs_dev = dev_cache_get(cmd, pathname, NULL))) { + if (test_mode()) { + log_print("Test mode skips checking fs block size."); + fs_block_size = 0; + goto skip_fs; + } + log_error("Device for LV not found to check block size %s", pathname); + goto_bad; + } + + /* + * get_fs_block_size() returns the libblkid BLOCK_SIZE value, + * where libblkid has fs-specific code to set BLOCK_SIZE to the + * value we need here. + * + * The term "block size" here may not equate directly to what the fs + * calls the block size, e.g. xfs calls this the sector size (and + * something different the block size); while ext4 does call this + * value the block size, but it's possible values are not the same + * as xfs's, and do not seem to relate directly to the device LBS. + * + * With 512 LBS and 4K PBS, mkfs.xfs will use xfs sector size 4K. + */ + rv = get_fs_block_size(fs_dev, &fs_block_size); +skip_fs: + if (!rv || !fs_block_size) { + if (lbs_4k && pbs_4k && !pbs_512) { + block_size = 4096; + } else if (lbs_512 && pbs_512 && !pbs_4k) { + block_size = 512; + } else if (lbs_512 && pbs_4k) { + if (block_size_setting == 4096) + block_size = 4096; + else + block_size = 512; + } else { + block_size = 512; + } + + if (block_size_setting && (block_size_setting != block_size)) { + log_error("Cannot use writecache block size %u with unknown file system block size, logical block size %u, physical block size %u.", + block_size_setting, lbs_4k ? 4096 : 512, pbs_4k ? 4096 : 512); + goto bad; + } + + if (block_size != 512) { + log_warn("WARNING: unable to detect a file system block size on %s", display_lvname(lv)); + log_warn("WARNING: using a writecache block size larger than the file system block size may corrupt the file system."); + if (!arg_is_set(cmd, yes_ARG) && + yes_no_prompt("Use writecache block size %u? [y/n]: ", block_size) == 'n') { + log_error("Conversion aborted."); + goto bad; + } + } + + log_print("Using writecache block size %u for unknown file system block size, logical block size %u, physical block size %u.", + block_size, lbs_4k ? 4096 : 512, pbs_4k ? 4096 : 512); + goto out; + } + + if (!block_size_setting) { + /* User did not specify a block size, so choose according to fs block size. */ + if (fs_block_size == 4096) + block_size = 4096; + else if (fs_block_size == 512) + block_size = 512; + else if (fs_block_size > 4096) + block_size = 4096; + else if (fs_block_size < 4096) + block_size = 512; + else + goto_bad; + } else { + if (block_size_setting <= fs_block_size) + block_size = block_size_setting; + else { + log_error("Writecache block size %u cannot be larger than file system block size %u.", + block_size_setting, fs_block_size); + goto_bad; + } + } + +out: + if (block_size == 512) + *block_size_sectors = 1; + else if (block_size == 4096) + *block_size_sectors = 8; + else + goto_bad; + + return 1; +bad: + return 0; +} + +int lvconvert_writecache_attach_single(struct cmd_context *cmd, struct logical_volume *lv, struct processing_handle *handle) { @@ -5616,68 +5779,91 @@ static int _lvconvert_writecache_attach_single(struct cmd_context *cmd, struct logical_volume *lv_fast; struct writecache_settings settings; const char *fast_name; - uint32_t block_size_sectors; + uint32_t block_size_sectors = 0; char *lockd_fast_args = NULL; char *lockd_fast_name = NULL; struct id lockd_fast_id; char cvol_name[NAME_LEN]; + int is_active; - fast_name = arg_str_value(cmd, cachevol_ARG, ""); + /* + * User specifies an existing cachevol to use or a cachedevice + * to create a cachevol from. + */ + if ((fast_name = arg_str_value(cmd, cachevol_ARG, NULL))) { + if (!validate_lvname_param(cmd, &vg->name, &fast_name)) + goto_bad; - if (!(lv_fast = find_lv(vg, fast_name))) { - log_error("LV %s not found.", fast_name); - goto bad; - } + if (!(lv_fast = find_lv(vg, fast_name))) { + log_error("LV %s not found.", fast_name); + goto bad; + } - if (lv_fast == lv) { - log_error("Invalid cachevol LV."); - goto bad; - } + if (lv_fast == lv) { + log_error("Invalid cachevol LV."); + goto bad; + } - if (!seg_is_linear(first_seg(lv_fast))) { - log_error("LV %s must be linear to use as a writecache.", display_lvname(lv_fast)); - goto bad; - } + if (lv_is_cache_vol(lv_fast)) { + log_error("LV %s is already used as a cachevol.", display_lvname(lv_fast)); + goto bad; + } - if (lv_is_cache_vol(lv_fast)) { - log_error("LV %s is already used as a cachevol.", display_lvname(lv_fast)); - goto bad; - } + if (!seg_is_linear(first_seg(lv_fast))) { + log_error("LV %s must be linear to use as a writecache.", display_lvname(lv_fast)); + goto bad; + } - /* - * To permit this we need to check the block size of the fs using lv - * (recently in libblkid) so that we can use a matching writecache - * block size. We also want to do that if the lv is inactive. - */ - if (lv_is_active(lv)) { - log_error("LV %s must be inactive to attach writecache.", display_lvname(lv)); - goto bad; - } + /* fast LV shouldn't generally be active by itself, but just in case. */ + if (lv_is_active(lv_fast)) { + log_error("LV %s must be inactive to attach.", display_lvname(lv_fast)); + goto bad; + } - /* fast LV shouldn't generally be active by itself, but just in case. */ - if (lv_info(cmd, lv_fast, 1, NULL, 0, 0)) { - log_error("LV %s must be inactive to attach.", display_lvname(lv_fast)); - goto bad; + if (!arg_is_set(cmd, yes_ARG) && + yes_no_prompt("Erase all existing data on %s? [y/n]: ", display_lvname(lv_fast)) == 'n') { + log_error("Conversion aborted."); + goto bad; + } + } else { + if (!_lv_create_cachevol(cmd, vg, lv, &lv_fast)) + goto_bad; } + is_active = lv_is_active(lv); + memset(&settings, 0, sizeof(settings)); - block_size_sectors = DEFAULT_WRITECACHE_BLOCK_SIZE_SECTORS; - if (!_get_writecache_settings(cmd, &settings, &block_size_sectors)) { + if (!get_writecache_settings(cmd, &settings, &block_size_sectors)) { log_error("Invalid writecache settings."); goto bad; } - if (!arg_is_set(cmd, yes_ARG) && - yes_no_prompt("Erase all existing data on %s? [y/n]: ", display_lvname(lv_fast)) == 'n') { - log_error("Conversion aborted."); - goto bad; + if (!is_active) { + /* checking block size of fs on the lv requires the lv to be active */ + if (!activate_lv(cmd, lv)) { + log_error("Failed to activate LV to check block size %s", display_lvname(lv)); + goto bad; + } + } + + if (!_set_writecache_block_size(cmd, lv, &block_size_sectors)) { + if (!is_active && !deactivate_lv(cmd, lv)) + stack; + goto_bad; } - /* Ensure the two LVs are not active elsewhere. */ + if (!is_active) { + if (!deactivate_lv(cmd, lv)) { + log_error("Failed to deactivate LV after checking block size %s", display_lvname(lv)); + goto bad; + } + } + + /* Ensure the LV is not active elsewhere. */ if (!lockd_lv(cmd, lv, "ex", 0)) goto_bad; - if (!lockd_lv(cmd, lv_fast, "ex", 0)) + if (fast_name && !lockd_lv(cmd, lv_fast, "ex", 0)) goto_bad; if (!archive(vg)) @@ -5744,7 +5930,7 @@ static int _lvconvert_writecache_attach_single(struct cmd_context *cmd, log_error("Failed to unlock fast LV %s/%s", vg->name, lockd_fast_name); } - log_print_unless_silent("Logical volume %s now has write cache.", + log_print_unless_silent("Logical volume %s now has writecache.", display_lvname(lv)); return ECMD_PROCESSED; bad: @@ -5768,7 +5954,7 @@ int lvconvert_to_writecache_cmd(struct cmd_context *cmd, int argc, char **argv) cmd->cname->flags &= ~GET_VGNAME_FROM_OPTIONS; ret = process_each_lv(cmd, cmd->position_argc, cmd->position_argv, NULL, NULL, READ_FOR_UPDATE, handle, NULL, - &_lvconvert_writecache_attach_single); + &lvconvert_writecache_attach_single); destroy_processing_handle(cmd, handle); @@ -5791,7 +5977,7 @@ int lvconvert_to_cache_with_cachevol_cmd(struct cmd_context *cmd, int argc, char cmd->cname->flags &= ~GET_VGNAME_FROM_OPTIONS; ret = process_each_lv(cmd, cmd->position_argc, cmd->position_argv, NULL, NULL, READ_FOR_UPDATE, handle, NULL, - &_lvconvert_cachevol_attach_single); + &lvconvert_cachevol_attach_single); destroy_processing_handle(cmd, handle); diff --git a/tools/lvcreate.c b/tools/lvcreate.c index 5c978b3..3357a08 100644 --- a/tools/lvcreate.c +++ b/tools/lvcreate.c @@ -766,7 +766,9 @@ static int _lvcreate_params(struct cmd_context *cmd, * * Ordering of following type tests is IMPORTANT */ - if ((segtype_str = arg_str_value(cmd, type_ARG, NULL))) { + if (lp->ignore_type) { + segtype_str = SEG_TYPE_NAME_STRIPED; + } else if ((segtype_str = arg_str_value(cmd, type_ARG, NULL))) { lp->type = 1; if (!strcmp(segtype_str, "linear")) { segtype_str = "striped"; @@ -1799,3 +1801,152 @@ int lvcreate(struct cmd_context *cmd, int argc, char **argv) destroy_processing_handle(cmd, handle); return ret; } + +static int _lvcreate_and_attach_writecache_single(struct cmd_context *cmd, + const char *vg_name, struct volume_group *vg, struct processing_handle *handle) +{ + struct processing_params *pp = (struct processing_params *) handle->custom_handle; + struct lvcreate_params *lp = pp->lp; + struct logical_volume *lv; + int ret; + + ret = _lvcreate_single(cmd, vg_name, vg, handle); + + if (ret == ECMD_FAILED) + return ret; + + if (!(lv = find_lv(vg, lp->lv_name))) { + log_error("Failed to find LV %s to add writecache.", lp->lv_name); + return ECMD_FAILED; + } + + ret = lvconvert_writecache_attach_single(cmd, lv, handle); + + if (ret == ECMD_FAILED) { + log_error("Removing new LV after failing to add writecache."); + if (!deactivate_lv(cmd, lv)) + log_error("Failed to deactivate new LV %s.", display_lvname(lv)); + if (!lv_remove_with_dependencies(cmd, lv, 1, 0)) + log_error("Failed to remove new LV %s.", display_lvname(lv)); + return ECMD_FAILED; + } + + return ECMD_PROCESSED; +} + +int lvcreate_and_attach_writecache_cmd(struct cmd_context *cmd, int argc, char **argv) +{ + struct processing_handle *handle = NULL; + struct processing_params pp; + struct lvcreate_params lp = { + .major = -1, + .minor = -1, + }; + struct lvcreate_cmdline_params lcp = { 0 }; + int ret; + + /* + * Tell lvcreate to ignore --type since we are using lvcreate + * to create a linear LV and using lvconvert to add cache. + * (Would be better if lvcreate code was split up so we could + * call a specific function that just created a linear/striped LV.) + */ + lp.ignore_type = 1; + + if (!_lvcreate_params(cmd, argc, argv, &lp, &lcp)) { + stack; + return EINVALID_CMD_LINE; + } + + pp.lp = &lp; + pp.lcp = &lcp; + + if (!(handle = init_processing_handle(cmd, NULL))) { + log_error("Failed to initialize processing handle."); + return ECMD_FAILED; + } + + handle->custom_handle = &pp; + + ret = process_each_vg(cmd, 0, NULL, lp.vg_name, NULL, READ_FOR_UPDATE, 0, handle, + &_lvcreate_and_attach_writecache_single); + + _destroy_lvcreate_params(&lp); + destroy_processing_handle(cmd, handle); + return ret; +} + +static int _lvcreate_and_attach_cache_single(struct cmd_context *cmd, + const char *vg_name, struct volume_group *vg, struct processing_handle *handle) +{ + struct processing_params *pp = (struct processing_params *) handle->custom_handle; + struct lvcreate_params *lp = pp->lp; + struct logical_volume *lv; + int ret; + + ret = _lvcreate_single(cmd, vg_name, vg, handle); + + if (ret == ECMD_FAILED) + return ret; + + if (!(lv = find_lv(vg, lp->lv_name))) { + log_error("Failed to find LV %s to add cache.", lp->lv_name); + return ECMD_FAILED; + } + + ret = lvconvert_cachevol_attach_single(cmd, lv, handle); + + if (ret == ECMD_FAILED) { + log_error("Removing new LV after failing to add cache."); + if (!deactivate_lv(cmd, lv)) + log_error("Failed to deactivate new LV %s.", display_lvname(lv)); + if (!lv_remove_with_dependencies(cmd, lv, 1, 0)) + log_error("Failed to remove new LV %s.", display_lvname(lv)); + return ECMD_FAILED; + } + + return ECMD_PROCESSED; +} + +int lvcreate_and_attach_cache_cmd(struct cmd_context *cmd, int argc, char **argv) +{ + struct processing_handle *handle = NULL; + struct processing_params pp; + struct lvcreate_params lp = { + .major = -1, + .minor = -1, + }; + struct lvcreate_cmdline_params lcp = { 0 }; + int ret; + + /* + * Tell lvcreate to ignore --type since we are using lvcreate + * to create a linear LV and using lvconvert to add cache. + * (Would be better if lvcreate code was split up so we could + * call a specific function that just created a linear/striped LV.) + */ + lp.ignore_type = 1; + + if (!_lvcreate_params(cmd, argc, argv, &lp, &lcp)) { + stack; + return EINVALID_CMD_LINE; + } + + pp.lp = &lp; + pp.lcp = &lcp; + + if (!(handle = init_processing_handle(cmd, NULL))) { + log_error("Failed to initialize processing handle."); + return ECMD_FAILED; + } + + handle->custom_handle = &pp; + + ret = process_each_vg(cmd, 0, NULL, lp.vg_name, NULL, READ_FOR_UPDATE, 0, handle, + &_lvcreate_and_attach_cache_single); + + _destroy_lvcreate_params(&lp); + destroy_processing_handle(cmd, handle); + return ret; +} + diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c index d87a8f0..7cf4e3f 100644 --- a/tools/lvmcmdline.c +++ b/tools/lvmcmdline.c @@ -124,8 +124,10 @@ static const struct command_function _command_functions[CMD_COUNT] = { { lvconvert_to_cachepool_CMD, lvconvert_to_pool_cmd }, { lvconvert_to_thin_with_external_CMD, lvconvert_to_thin_with_external_cmd }, { lvconvert_to_cache_with_cachevol_CMD, lvconvert_to_cache_with_cachevol_cmd }, + { lvconvert_to_cache_with_device_CMD, lvconvert_to_cache_with_cachevol_cmd }, { lvconvert_to_cache_with_cachepool_CMD, lvconvert_to_cache_with_cachepool_cmd }, { lvconvert_to_writecache_CMD, lvconvert_to_writecache_cmd }, + { lvconvert_to_writecache_with_device_CMD, lvconvert_to_writecache_cmd }, { lvconvert_swap_pool_metadata_CMD, lvconvert_swap_pool_metadata_cmd }, { lvconvert_to_thinpool_or_swap_metadata_CMD, lvconvert_to_pool_or_swap_metadata_cmd }, { lvconvert_to_cachepool_or_swap_metadata_CMD, lvconvert_to_pool_or_swap_metadata_cmd }, @@ -152,6 +154,12 @@ static const struct command_function _command_functions[CMD_COUNT] = { /* lvconvert for integrity */ { lvconvert_integrity_CMD, lvconvert_integrity_cmd }, + /* lvcreate */ + { lvcreate_and_attach_cachevol_for_cache_CMD, lvcreate_and_attach_cache_cmd }, + { lvcreate_and_attach_cachedevice_for_cache_CMD, lvcreate_and_attach_cache_cmd }, + { lvcreate_and_attach_cachevol_for_writecache_CMD, lvcreate_and_attach_writecache_cmd }, + { lvcreate_and_attach_cachedevice_for_writecache_CMD, lvcreate_and_attach_writecache_cmd }, + { pvscan_display_CMD, pvscan_display_cmd }, { pvscan_cache_CMD, pvscan_cache_cmd }, }; diff --git a/tools/toollib.c b/tools/toollib.c index 89b6374..eb0de55 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -1184,6 +1184,170 @@ out: return ok; } +static int _get_one_writecache_setting(struct cmd_context *cmd, struct writecache_settings *settings, + char *key, char *val, uint32_t *block_size_sectors) +{ + /* special case: block_size is not a setting but is set with the --cachesettings option */ + if (!strncmp(key, "block_size", strlen("block_size"))) { + uint32_t block_size = 0; + if (sscanf(val, "%u", &block_size) != 1) + goto_bad; + if (block_size == 512) + *block_size_sectors = 1; + else if (block_size == 4096) + *block_size_sectors = 8; + else + goto_bad; + return 1; + } + + if (!strncmp(key, "high_watermark", strlen("high_watermark"))) { + if (sscanf(val, "%llu", (unsigned long long *)&settings->high_watermark) != 1) + goto_bad; + if (settings->high_watermark > 100) + goto_bad; + settings->high_watermark_set = 1; + return 1; + } + + if (!strncmp(key, "low_watermark", strlen("low_watermark"))) { + if (sscanf(val, "%llu", (unsigned long long *)&settings->low_watermark) != 1) + goto_bad; + if (settings->low_watermark > 100) + goto_bad; + settings->low_watermark_set = 1; + return 1; + } + + if (!strncmp(key, "writeback_jobs", strlen("writeback_jobs"))) { + if (sscanf(val, "%llu", (unsigned long long *)&settings->writeback_jobs) != 1) + goto_bad; + settings->writeback_jobs_set = 1; + return 1; + } + + if (!strncmp(key, "autocommit_blocks", strlen("autocommit_blocks"))) { + if (sscanf(val, "%llu", (unsigned long long *)&settings->autocommit_blocks) != 1) + goto_bad; + settings->autocommit_blocks_set = 1; + return 1; + } + + if (!strncmp(key, "autocommit_time", strlen("autocommit_time"))) { + if (sscanf(val, "%llu", (unsigned long long *)&settings->autocommit_time) != 1) + goto_bad; + settings->autocommit_time_set = 1; + return 1; + } + + if (!strncmp(key, "fua", strlen("fua"))) { + if (settings->nofua_set) { + log_error("Setting fua and nofua cannot both be set."); + return 0; + } + if (sscanf(val, "%u", &settings->fua) != 1) + goto_bad; + settings->fua_set = 1; + return 1; + } + + if (!strncmp(key, "nofua", strlen("nofua"))) { + if (settings->fua_set) { + log_error("Setting fua and nofua cannot both be set."); + return 0; + } + if (sscanf(val, "%u", &settings->nofua) != 1) + goto_bad; + settings->nofua_set = 1; + return 1; + } + + if (!strncmp(key, "cleaner", strlen("cleaner"))) { + if (sscanf(val, "%u", &settings->cleaner) != 1) + goto_bad; + settings->cleaner_set = 1; + return 1; + } + + if (!strncmp(key, "max_age", strlen("max_age"))) { + if (sscanf(val, "%u", &settings->max_age) != 1) + goto_bad; + settings->max_age_set = 1; + return 1; + } + + if (settings->new_key) { + log_error("Setting %s is not recognized. Only one unrecognized setting is allowed.", key); + return 0; + } + + log_warn("Unrecognized writecache setting \"%s\" may cause activation failure.", key); + if (yes_no_prompt("Use unrecognized writecache setting? [y/n]: ") == 'n') { + log_error("Aborting writecache conversion."); + return 0; + } + + log_warn("Using unrecognized writecache setting: %s = %s.", key, val); + + settings->new_key = dm_pool_strdup(cmd->mem, key); + settings->new_val = dm_pool_strdup(cmd->mem, val); + return 1; + + bad: + log_error("Invalid setting: %s", key); + return 0; +} + +int get_writecache_settings(struct cmd_context *cmd, struct writecache_settings *settings, + uint32_t *block_size_sectors) +{ + struct arg_value_group_list *group; + const char *str; + char key[64]; + char val[64]; + int num; + int pos; + + /* + * "grouped" means that multiple --cachesettings options can be used. + * Each option is also allowed to contain multiple key = val pairs. + */ + + dm_list_iterate_items(group, &cmd->arg_value_groups) { + if (!grouped_arg_is_set(group->arg_values, cachesettings_ARG)) + continue; + + if (!(str = grouped_arg_str_value(group->arg_values, cachesettings_ARG, NULL))) + break; + + pos = 0; + + while (pos < strlen(str)) { + /* scan for "key1=val1 key2 = val2 key3= val3" */ + + memset(key, 0, sizeof(key)); + memset(val, 0, sizeof(val)); + + if (sscanf(str + pos, " %63[^=]=%63s %n", key, val, &num) != 2) { + log_error("Invalid setting at: %s", str+pos); + return 0; + } + + pos += num; + + if (!_get_one_writecache_setting(cmd, settings, key, val, block_size_sectors)) + return_0; + } + } + + if (settings->high_watermark_set && settings->low_watermark_set && + (settings->high_watermark <= settings->low_watermark)) { + log_error("High watermark must be greater than low watermark."); + return 0; + } + + return 1; +} /* FIXME move to lib */ static int _pv_change_tag(struct physical_volume *pv, const char *tag, int addtag) diff --git a/tools/toollib.h b/tools/toollib.h index 53a5e5b..f3a60fb 100644 --- a/tools/toollib.h +++ b/tools/toollib.h @@ -217,6 +217,9 @@ int get_cache_params(struct cmd_context *cmd, const char **name, struct dm_config_tree **settings); +int get_writecache_settings(struct cmd_context *cmd, struct writecache_settings *settings, + uint32_t *block_size_sectors); + int change_tag(struct cmd_context *cmd, struct volume_group *vg, struct logical_volume *lv, struct physical_volume *pv, int arg); diff --git a/tools/tools.h b/tools/tools.h index 7f2434d..c3d780d 100644 --- a/tools/tools.h +++ b/tools/tools.h @@ -278,7 +278,18 @@ int lvconvert_to_vdopool_param_cmd(struct cmd_context *cmd, int argc, char **arg int lvconvert_integrity_cmd(struct cmd_context *cmd, int argc, char **argv); +int lvcreate_and_attach_writecache_cmd(struct cmd_context *cmd, int argc, char **argv); +int lvcreate_and_attach_cache_cmd(struct cmd_context *cmd, int argc, char **argv); + int pvscan_display_cmd(struct cmd_context *cmd, int argc, char **argv); int pvscan_cache_cmd(struct cmd_context *cmd, int argc, char **argv); + +int lvconvert_writecache_attach_single(struct cmd_context *cmd, + struct logical_volume *lv, + struct processing_handle *handle); +int lvconvert_cachevol_attach_single(struct cmd_context *cmd, + struct logical_volume *lv, + struct processing_handle *handle); + #endif -- 1.8.3.1