From c479153d77b419a6cae4551b63d2b73096c1130e Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 18 Jul 2016 19:04:43 +0200 Subject: [PATCH 1/3] maint: sort.c: deduplicate code for traversing numbers * src/sort.c (traverse_raw_number): New function for traversing numbers. (find_unit_order): Use traverse_raw_number() instead of open-coding it. (debug_key): Likewise. --- src/sort.c | 63 ++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/src/sort.c b/src/sort.c index 5b02343..e28bb6c 100644 --- a/src/sort.c +++ b/src/sort.c @@ -2231,18 +2231,16 @@ static char const unit_order[UCHAR_LIM] = #endif }; -/* Return an integer that represents the order of magnitude of the - unit following the number. The number may contain thousands - separators and a decimal point, but it may not contain leading blanks. - Negative numbers get negative orders; zero numbers have a zero order. */ - -static int _GL_ATTRIBUTE_PURE -find_unit_order (char const *number) +/* Traverse number given as *number consisting of digits, thousands_sep, and + decimal_point chars only. Returns the highest digit found in the number, + or '\0' if no digit has been found. Upon return *number points at the + character that immediately follows after the given number. */ +static unsigned char +traverse_raw_number (char const **number) { - bool minus_sign = (*number == '-'); - char const *p = number + minus_sign; - int nonzero = 0; + char const *p = *number; unsigned char ch; + unsigned char max_digit = '\0'; /* Scan to end of number. Decimals or separators not followed by digits stop the scan. @@ -2253,16 +2251,34 @@ find_unit_order (char const *number) do { while (ISDIGIT (ch = *p++)) - nonzero |= ch - '0'; + if (max_digit < ch) + max_digit = ch; } while (ch == thousands_sep); if (ch == decimal_point) while (ISDIGIT (ch = *p++)) - nonzero |= ch - '0'; + if (max_digit < ch) + max_digit = ch; + + *number = p - 1; + return max_digit; +} + +/* Return an integer that represents the order of magnitude of the + unit following the number. The number may contain thousands + separators and a decimal point, but it may not contain leading blanks. + Negative numbers get negative orders; zero numbers have a zero order. */ - if (nonzero) +static int _GL_ATTRIBUTE_PURE +find_unit_order (char const *number) +{ + bool minus_sign = (*number == '-'); + char const *p = number + minus_sign; + unsigned char max_digit = traverse_raw_number (&p); + if ('0' < max_digit) { + unsigned char ch = *p; int order = unit_order[ch]; return (minus_sign ? -order : order); } @@ -2655,23 +2671,14 @@ debug_key (struct line const *line, struct keyfield const *key) ignore_value (strtold (beg, &tighter_lim)); else if (key->numeric || key->human_numeric) { - char *p = beg + (beg < lim && *beg == '-'); - bool found_digit = false; - unsigned char ch; - - do + char const *p = beg + (beg < lim && *beg == '-'); + unsigned char max_digit = traverse_raw_number (&p); + if ('0' <= max_digit) { - while (ISDIGIT (ch = *p++)) - found_digit = true; + unsigned char ch = *p; + tighter_lim = (char *) p + + (key->human_numeric && unit_order[ch]); } - while (ch == thousands_sep); - - if (ch == decimal_point) - while (ISDIGIT (ch = *p++)) - found_digit = true; - - if (found_digit) - tighter_lim = p - ! (key->human_numeric && unit_order[ch]); } else tighter_lim = lim; -- 2.5.5 From 8c39465a5b0343ff7a21286dd69ed5430685d2f7 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 18 Jul 2016 19:04:44 +0200 Subject: [PATCH 2/3] sort: make -h work with -k and blank used as thousands separator * src/sort.c (traverse_raw_number): Allow to skip only one occurrence of thousands_sep to avoid finding the unit in the next column in case thousands_sep matches as blank and is used as column delimiter. * tests/misc/sort-h-thousands-sep.sh: Add regression test for this bug. * tests/local.mk: Reference the test. * NEWS: Mention the bug fix. Reported at https://bugzilla.redhat.com/1355780 Fixes http://bugs.gnu.org/24015 --- src/sort.c | 14 ++++++++---- tests/local.mk | 1 + tests/misc/sort-h-thousands-sep.sh | 47 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100755 tests/misc/sort-h-thousands-sep.sh diff --git a/src/sort.c b/src/sort.c index e28bb6c..dd3ba58 100644 --- a/src/sort.c +++ b/src/sort.c @@ -2248,13 +2248,17 @@ traverse_raw_number (char const **number) to be lacking in units. FIXME: add support for multibyte thousands_sep and decimal_point. */ - do + while (ISDIGIT (ch = *p++)) { - while (ISDIGIT (ch = *p++)) - if (max_digit < ch) - max_digit = ch; + if (max_digit < ch) + max_digit = ch; + + /* Allow to skip only one occurrence of thousands_sep to avoid finding + the unit in the next column in case thousands_sep matches as blank + and is used as column delimiter. */ + if (*p == thousands_sep) + ++p; } - while (ch == thousands_sep); if (ch == decimal_point) while (ISDIGIT (ch = *p++)) diff --git a/tests/local.mk b/tests/local.mk index 42d39f2..dccff8d 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -344,6 +344,7 @@ all_tests = \ tests/misc/sort-discrim.sh \ tests/misc/sort-files0-from.pl \ tests/misc/sort-float.sh \ + tests/misc/sort-h-thousands-sep.sh \ tests/misc/sort-mb-tests.sh \ tests/i18n/sort.sh \ tests/misc/sort-merge.pl \ diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh new file mode 100755 index 0000000..17f1b6c --- /dev/null +++ b/tests/misc/sort-h-thousands-sep.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# exercise 'sort -h' in locales where thousands separator is blank + +# Copyright (C) 2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ sort +test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \ + || skip_ 'The Swedish locale with blank thousands separator is unavailable.' + +tee exp1 > in << _EOF_ +1 1k 4 003 1M +2k 2M 4 002 2 +3M 3 4 001 3k +_EOF_ + +cat > exp2 << _EOF_ +3M 3 4 001 3k +1 1k 4 003 1M +2k 2M 4 002 2 +_EOF_ + +cat > exp3 << _EOF_ +3M 3 4 001 3k +2k 2M 4 002 2 +1 1k 4 003 1M +_EOF_ + +for i in 1 2 3; do + LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1 + compare "exp${i}" "out${i}" || fail=1 +done + +Exit $fail -- 2.5.5 From 46ef53f558e7bc1c0bc0abd62a86b40b4141e058 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 18 Jul 2016 19:04:45 +0200 Subject: [PATCH 3/3] sort: with -h, disallow thousands separator between number and unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * src/sort.c (traverse_raw_number): Accept thousands separator only if it is immediately followed by a digit. * tests/misc/sort-h-thousands-sep.sh: Cover the fix for this bug. Suggested by Pádraig Brady in http://bugs.gnu.org/24015 --- src/sort.c | 11 ++++++++++- tests/misc/sort-h-thousands-sep.sh | 25 +++++++++++++------------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/sort.c b/src/sort.c index dd3ba58..69ef75f 100644 --- a/src/sort.c +++ b/src/sort.c @@ -2241,6 +2241,7 @@ traverse_raw_number (char const **number) char const *p = *number; unsigned char ch; unsigned char max_digit = '\0'; + bool ends_with_thousands_sep = false; /* Scan to end of number. Decimals or separators not followed by digits stop the scan. @@ -2256,10 +2257,18 @@ traverse_raw_number (char const **number) /* Allow to skip only one occurrence of thousands_sep to avoid finding the unit in the next column in case thousands_sep matches as blank and is used as column delimiter. */ - if (*p == thousands_sep) + ends_with_thousands_sep = (*p == thousands_sep); + if (ends_with_thousands_sep) ++p; } + if (ends_with_thousands_sep) + { + /* thousands_sep not followed by digit is not allowed. */ + *number = p - 2; + return max_digit; + } + if (ch == decimal_point) while (ISDIGIT (ch = *p++)) if (max_digit < ch) diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh index 17f1b6c..3ffa89e 100755 --- a/tests/misc/sort-h-thousands-sep.sh +++ b/tests/misc/sort-h-thousands-sep.sh @@ -18,28 +18,29 @@ . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src print_ver_ sort + test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \ || skip_ 'The Swedish locale with blank thousands separator is unavailable.' -tee exp1 > in << _EOF_ -1 1k 4 003 1M -2k 2M 4 002 2 -3M 3 4 001 3k +tee exp1 exp3 > in << _EOF_ +1 1k 1 M 4 003 1M +2k 2M 2 k 4 002 2 +3M 3 3 G 4 001 3k _EOF_ cat > exp2 << _EOF_ -3M 3 4 001 3k -1 1k 4 003 1M -2k 2M 4 002 2 +3M 3 3 G 4 001 3k +1 1k 1 M 4 003 1M +2k 2M 2 k 4 002 2 _EOF_ -cat > exp3 << _EOF_ -3M 3 4 001 3k -2k 2M 4 002 2 -1 1k 4 003 1M +cat > exp5 << _EOF_ +3M 3 3 G 4 001 3k +2k 2M 2 k 4 002 2 +1 1k 1 M 4 003 1M _EOF_ -for i in 1 2 3; do +for i in 1 2 3 5; do LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1 compare "exp${i}" "out${i}" || fail=1 done -- 2.5.5