WHATS_NEW | 6 +++++ configure | 20 ++++++++++++++++ configure.in | 10 ++++++++ doc/example.conf.in | 11 ++++++-- lib/config/defaults.h | 1 + lib/format_text/format-text.c | 20 +++++++++------ lib/metadata/metadata.c | 48 ++++++++++++++++++++++++++------------ lib/misc/configure.h.in | 3 ++ test/t-pvcreate-operation-md.sh | 32 ++++++++++++++++++++++++- 9 files changed, 123 insertions(+), 28 deletions(-) diff --git a/WHATS_NEW b/WHATS_NEW index c63251f..90fd9d1 100644 --- a/WHATS_NEW +++ b/WHATS_NEW @@ -1,3 +1,9 @@ + Version 2.02.74 - + ================================== + Add configure --with-default-data-alignment. + Update heuristic used for default and detected data alignment. + Add "devices/default_data_alignment" to lvm.conf. + Version 2.02.73 - 18th August 2010 ================================== Fix potential for corruption during cluster mirror device failure. diff --git a/configure b/configure index 5ecb768..0e8e7e1 100755 --- a/configure +++ b/configure @@ -694,6 +694,7 @@ DL_LIBS DEVMAPPER DEFAULT_RUN_DIR DEFAULT_LOCK_DIR +DEFAULT_DATA_ALIGNMENT DEFAULT_CACHE_SUBDIR DEFAULT_BACKUP_SUBDIR DEFAULT_ARCHIVE_SUBDIR @@ -872,6 +873,7 @@ with_default_archive_subdir with_default_backup_subdir with_default_cache_subdir with_default_locking_dir +with_default_data_alignment with_interface ' ac_precious_vars='build_alias @@ -1615,6 +1617,8 @@ Optional Packages: default metadata cache subdir [cache] --with-default-locking-dir=DIR default locking directory [/var/lock/lvm] + --with-default-data-alignment=NUM + set the default data alignment in MiB [1] --with-interface=IFACE choose kernel interface (ioctl) [ioctl] Some influential environment variables: @@ -18080,6 +18084,21 @@ _ACEOF ################################################################################ + +# Check whether --with-default-data-alignment was given. +if test "${with_default_data_alignment+set}" = set; then + withval=$with_default_data_alignment; DEFAULT_DATA_ALIGNMENT=$withval +else + DEFAULT_DATA_ALIGNMENT=1 +fi + + +cat >>confdefs.h <<_ACEOF +#define DEFAULT_DATA_ALIGNMENT $DEFAULT_DATA_ALIGNMENT +_ACEOF + + +################################################################################ { $as_echo "$as_me:$LINENO: checking for kernel interface choice" >&5 $as_echo_n "checking for kernel interface choice... " >&6; } @@ -18220,6 +18239,7 @@ LVM_LIBAPI=`echo "$VER" | $AWK -F '[()]' '{print $2}'` + ################################################################################ ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile doc/Makefile doc/example.conf include/.symlinks include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/replicator/Makefile lib/misc/lvm-version.h lib/snapshot/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/lvm2_monitoring_init_red_hat scripts/Makefile test/Makefile test/api/Makefile tools/Makefile udev/Makefile unit-tests/datastruct/Makefile unit-tests/regex/Makefile unit-tests/mm/Makefile" diff --git a/configure.in b/configure.in index 3c39151..18484f7 100644 --- a/configure.in +++ b/configure.in @@ -1185,6 +1185,15 @@ AC_DEFINE_UNQUOTED(DEFAULT_LOCK_DIR, ["$DEFAULT_LOCK_DIR"], [Name of default locking directory.]) ################################################################################ +dnl -- Setup default data alignment +AC_ARG_WITH(default-data-alignment, + AC_HELP_STRING([--with-default-data-alignment=NUM], + [set the default data alignment in MiB [[1]]]), + DEFAULT_DATA_ALIGNMENT=$withval, DEFAULT_DATA_ALIGNMENT=1) +AC_DEFINE_UNQUOTED(DEFAULT_DATA_ALIGNMENT, [$DEFAULT_DATA_ALIGNMENT], + [Default data alignment.]) + +################################################################################ dnl -- which kernel interface to use (ioctl only) AC_MSG_CHECKING(for kernel interface choice) AC_ARG_WITH(interface, @@ -1244,6 +1253,7 @@ AC_SUBST(DEFAULT_SYS_DIR) AC_SUBST(DEFAULT_ARCHIVE_SUBDIR) AC_SUBST(DEFAULT_BACKUP_SUBDIR) AC_SUBST(DEFAULT_CACHE_SUBDIR) +AC_SUBST(DEFAULT_DATA_ALIGNMENT) AC_SUBST(DEFAULT_LOCK_DIR) AC_SUBST(DEFAULT_RUN_DIR) AC_SUBST(DEVMAPPER) diff --git a/doc/example.conf.in b/doc/example.conf.in index 7edae71..17fca05 100644 --- a/doc/example.conf.in +++ b/doc/example.conf.in @@ -98,6 +98,10 @@ devices { # 1 enables; 0 disables. md_chunk_alignment = 1 + # Default alignment of the start of a data area in MB. If set to 0, + # a value of 64KB will be used. Set to 1 for 1MiB, 2 for 2MiB, etc. + # default_data_alignment = @DEFAULT_DATA_ALIGNMENT@ + # By default, the start of a PV's data area will be a multiple of # the 'minimum_io_size' or 'optimal_io_size' exposed in sysfs. # - minimum_io_size - the smallest request the device can perform @@ -111,9 +115,9 @@ devices { data_alignment_detection = 1 # Alignment (in KB) of start of data area when creating a new PV. - # If a PV is placed directly upon an md device and md_chunk_alignment or - # data_alignment_detection is enabled this parameter is ignored. - # Set to 0 for the default alignment of 1MB or page size, if larger. + # md_chunk_alignment and data_alignment_detection are disabled if set. + # Set to 0 for the default alignment (see: data_alignment_default) + # or page size, if larger. data_alignment = 0 # By default, the start of the PV's aligned data area will be shifted by @@ -122,6 +126,7 @@ devices { # windows partitioning will have an alignment_offset of 3584 bytes # (sector 7 is the lowest aligned logical block, the 4KB sectors start # at LBA -1, and consequently sector 63 is aligned on a 4KB boundary). + # But note that pvcreate --dataalignmentoffset will skip this detection. # 1 enables; 0 disables. data_alignment_offset_detection = 1 diff --git a/lib/config/defaults.h b/lib/config/defaults.h index b6308f4..e4db65e 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -17,6 +17,7 @@ #define _LVM_DEFAULTS_H #define DEFAULT_PE_ALIGN 2048 +#define DEFAULT_PE_ALIGN_OLD 128 #define DEFAULT_ARCHIVE_ENABLED 1 #define DEFAULT_BACKUP_ENABLED 1 diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index c8cf89a..db5af2a 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1861,16 +1861,20 @@ static int _text_pv_setup(const struct format_type *fmt, 0) * 2; if (set_pe_align(pv, data_alignment) != data_alignment && - data_alignment) - log_warn("WARNING: %s: Overriding data alignment to " - "%lu sectors (requested %lu sectors)", - pv_dev_name(pv), pv->pe_align, data_alignment); + data_alignment) { + log_error("%s: invalid data alignment of " + "%lu sectors (requested %lu sectors)", + pv_dev_name(pv), pv->pe_align, data_alignment); + return 0; + } if (set_pe_align_offset(pv, data_alignment_offset) != data_alignment_offset && - data_alignment_offset) - log_warn("WARNING: %s: Overriding data alignment offset to " - "%lu sectors (requested %lu sectors)", - pv_dev_name(pv), pv->pe_align_offset, data_alignment_offset); + data_alignment_offset) { + log_error("%s: invalid data alignment offset of " + "%lu sectors (requested %lu sectors)", + pv_dev_name(pv), pv->pe_align_offset, data_alignment_offset); + return 0; + } if (pv->pe_align < pv->pe_align_offset) { log_error("%s: pe_align (%lu sectors) must not be less " diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 6cd7793..c79f618 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -62,23 +62,38 @@ static uint32_t _vg_bad_status_bits(const struct volume_group *vg, const char _really_init[] = "Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? "; -static int _alignment_overrides_default(unsigned long data_alignment) +static int _alignment_overrides_default(unsigned long data_alignment, + unsigned long default_pe_align) { - return data_alignment && (DEFAULT_PE_ALIGN % data_alignment); + return data_alignment && (default_pe_align % data_alignment); } unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment) { - unsigned long temp_pe_align; + unsigned long default_pe_align, temp_pe_align; if (pv->pe_align) goto out; - if (data_alignment) + if (data_alignment) { + /* Always use specified data_alignment */ pv->pe_align = data_alignment; + goto out; + } + + default_pe_align = find_config_tree_int(pv->fmt->cmd, + "devices/default_data_alignment", + DEFAULT_DATA_ALIGNMENT); + + if (default_pe_align) + /* align on 1 MiB multiple */ + default_pe_align *= DEFAULT_PE_ALIGN; else - pv->pe_align = MAX((DEFAULT_PE_ALIGN << SECTOR_SHIFT), - lvm_getpagesize()) >> SECTOR_SHIFT; + /* align on 64 KiB multiple (old default) */ + default_pe_align = DEFAULT_PE_ALIGN_OLD; + + pv->pe_align = MAX((default_pe_align << SECTOR_SHIFT), + lvm_getpagesize()) >> SECTOR_SHIFT; if (!pv->dev) goto out; @@ -89,8 +104,8 @@ unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignm if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment", DEFAULT_MD_CHUNK_ALIGNMENT)) { temp_pe_align = dev_md_stripe_width(pv->fmt->cmd->sysfs_dir, pv->dev); - if (_alignment_overrides_default(temp_pe_align)) - pv->pe_align = temp_pe_align; + if (_alignment_overrides_default(temp_pe_align, default_pe_align)) + pv->pe_align = MAX(pv->pe_align, temp_pe_align); } /* @@ -104,18 +119,18 @@ unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignm "devices/data_alignment_detection", DEFAULT_DATA_ALIGNMENT_DETECTION)) { temp_pe_align = dev_minimum_io_size(pv->fmt->cmd->sysfs_dir, pv->dev); - if (_alignment_overrides_default(temp_pe_align)) - pv->pe_align = temp_pe_align; + if (_alignment_overrides_default(temp_pe_align, default_pe_align)) + pv->pe_align = MAX(pv->pe_align, temp_pe_align); temp_pe_align = dev_optimal_io_size(pv->fmt->cmd->sysfs_dir, pv->dev); - if (_alignment_overrides_default(temp_pe_align)) - pv->pe_align = temp_pe_align; + if (_alignment_overrides_default(temp_pe_align, default_pe_align)) + pv->pe_align = MAX(pv->pe_align, temp_pe_align); } +out: log_very_verbose("%s: Setting PE alignment to %lu sectors.", dev_name(pv->dev), pv->pe_align); -out: return pv->pe_align; } @@ -125,8 +140,11 @@ unsigned long set_pe_align_offset(struct physical_volume *pv, if (pv->pe_align_offset) goto out; - if (data_alignment_offset) + if (data_alignment_offset) { + /* Always use specified data_alignment_offset */ pv->pe_align_offset = data_alignment_offset; + goto out; + } if (!pv->dev) goto out; @@ -142,10 +160,10 @@ unsigned long set_pe_align_offset(struct physical_volume *pv, pv->pe_align_offset = MAX(pv->pe_align_offset, align_offset); } +out: log_very_verbose("%s: Setting PE alignment offset to %lu sectors.", dev_name(pv->dev), pv->pe_align_offset); -out: return pv->pe_align_offset; } diff --git a/lib/misc/configure.h.in b/lib/misc/configure.h.in index 0161471..59b656d 100644 --- a/lib/misc/configure.h.in +++ b/lib/misc/configure.h.in @@ -32,6 +32,9 @@ /* Name of default metadata cache subdirectory. */ #undef DEFAULT_CACHE_SUBDIR +/* Default data alignment. */ +#undef DEFAULT_DATA_ALIGNMENT + /* Name of default locking directory. */ #undef DEFAULT_LOCK_DIR diff --git a/test/t-pvcreate-operation-md.sh b/test/t-pvcreate-operation-md.sh index 41e3534..cab63a0 100644 --- a/test/t-pvcreate-operation-md.sh +++ b/test/t-pvcreate-operation-md.sh @@ -41,13 +41,17 @@ cleanup_md() { sleep 2 rm -f $mddev fi +} + +cleanup_md_and_teardown() { + cleanup_md teardown } # create 2 disk MD raid0 array (stripe_width=128K) test -b "$mddev" && exit 200 -mdadm --create $mddev --auto=md --level 0 --raid-devices=2 --chunk 64 $dev1 $dev2 -trap 'aux cleanup_md' EXIT # cleanup this MD device at the end of the test +mdadm --create --metadata=1.0 $mddev --auto=md --level 0 --raid-devices=2 --chunk 64 $dev1 $dev2 +trap 'aux cleanup_md_and_teardown' EXIT # cleanup this MD device at the end of the test test -b "$mddev" || exit 200 # Test alignment of PV on MD without any MD-aware or topology-aware detection @@ -113,3 +117,27 @@ EOF pvremove $mddev_p fi fi + +# Test newer topology-aware alignment detection w/ --dataalignment override +if [ $linux_minor -ge 33 ]; then + cleanup_md + pvcreate -f $dev1 + pvcreate -f $dev2 + + # create 2 disk MD raid0 array (stripe_width=2M) + test -b "$mddev" && exit 200 + mdadm --create --metadata=1.0 $mddev --auto=md --level 0 --raid-devices=2 --chunk 1024 $dev1 $dev2 + test -b "$mddev" || exit 200 + + # optimal_io_size=2097152, minimum_io_size=1048576 + pv_align="2.00m" + pvcreate --metadatasize 128k \ + --config 'devices { md_chunk_alignment=0 }' $mddev + check_pv_field_ $mddev pe_start $pv_align + + # now verify pe_start alignment override using --dataalignment + pv_align="192.00k" + pvcreate --dataalignment 64k --metadatasize 128k \ + --config 'devices { md_chunk_alignment=0 }' $mddev + check_pv_field_ $mddev pe_start $pv_align +fi