- Jakub's sort -t multibyte fixes (bug #147567).

This commit is contained in:
Tim Waugh 2005-02-09 13:32:52 +00:00
parent 4614621ac5
commit c790d771ae
2 changed files with 180 additions and 38 deletions

View File

@ -1351,8 +1351,8 @@
/* We've just printed some files and need to clean up things before /* We've just printed some files and need to clean up things before
looking for more options and printing the next batch of files. looking for more options and printing the next batch of files.
--- coreutils-5.1.3/src/sort.c 2004-02-16 15:36:40.000000000 +0000 --- coreutils-5.2.1/src/sort.c 2004-12-15 14:10:01.347312694 +0000
+++ coreutils-5.2.1/src/sort.c 2004-12-15 14:10:01.347312694 +0000 +++ coreutils-5.2.1/src/sort.c 2005-02-09 07:34:40.000000000 -0500
@@ -23,10 +23,31 @@ @@ -23,10 +23,31 @@
#include <config.h> #include <config.h>
@ -1440,17 +1440,24 @@
/* The kind of blanks for '-b' to skip in various options. */ /* The kind of blanks for '-b' to skip in various options. */
enum blanktype { bl_start, bl_end, bl_both }; enum blanktype { bl_start, bl_end, bl_both };
@@ -251,7 +306,8 @@ @@ -245,13 +300,11 @@
/* Tab character separating fields. If TAB_DEFAULT, then fields are they were read if all keys compare equal. */
static bool stable;
-/* If TAB has this value, blanks separate fields. */
-enum { TAB_DEFAULT = CHAR_MAX + 1 };
-
-/* Tab character separating fields. If TAB_DEFAULT, then fields are
+/* Tab character separating fields. If tab_length is 0, then fields are
separated by the empty string between a non-blank character and a blank separated by the empty string between a non-blank character and a blank
character. */ character. */
-static int tab = TAB_DEFAULT; -static int tab = TAB_DEFAULT;
+static int tab[MB_LEN_MAX + 1] = { TAB_DEFAULT }; +static char tab[MB_LEN_MAX + 1];
+static size_t tab_length = 1; +static size_t tab_length = 0;
/* Flag to remove consecutive duplicate lines from the output. /* Flag to remove consecutive duplicate lines from the output.
Only the last of a sequence of equal lines will be output. */ Only the last of a sequence of equal lines will be output. */
@@ -384,6 +440,46 @@ @@ -384,6 +437,46 @@
}; };
static struct tempnode *volatile temphead; static struct tempnode *volatile temphead;
@ -1497,7 +1504,7 @@
/* Clean up any remaining temporary files. */ /* Clean up any remaining temporary files. */
static void static void
@@ -521,7 +617,7 @@ @@ -521,7 +614,7 @@
} }
} }
@ -1506,7 +1513,7 @@
static int static int
struct_month_cmp (const void *m1, const void *m2) struct_month_cmp (const void *m1, const void *m2)
@@ -536,7 +632,7 @@ @@ -536,7 +629,7 @@
/* Initialize the character class tables. */ /* Initialize the character class tables. */
static void static void
@ -1515,7 +1522,7 @@
{ {
int i; int i;
@@ -574,6 +670,64 @@ @@ -574,6 +667,64 @@
#endif #endif
} }
@ -1580,7 +1587,7 @@
/* Specify the amount of main memory to use when sorting. */ /* Specify the amount of main memory to use when sorting. */
static void static void
specify_sort_size (char const *s) specify_sort_size (char const *s)
@@ -784,7 +938,7 @@ @@ -784,7 +935,7 @@
by KEY in LINE. */ by KEY in LINE. */
static char * static char *
@ -1589,12 +1596,12 @@
{ {
register char *ptr = line->text, *lim = ptr + line->length - 1; register char *ptr = line->text, *lim = ptr + line->length - 1;
register size_t sword = key->sword; register size_t sword = key->sword;
@@ -794,10 +948,10 @@ @@ -794,10 +945,10 @@
/* The leading field separator itself is included in a field when -t /* The leading field separator itself is included in a field when -t
is absent. */ is absent. */
- if (tab != TAB_DEFAULT) - if (tab != TAB_DEFAULT)
+ if (tab[0] != TAB_DEFAULT) + if (tab_length)
while (ptr < lim && sword--) while (ptr < lim && sword--)
{ {
- while (ptr < lim && *ptr != tab) - while (ptr < lim && *ptr != tab)
@ -1602,7 +1609,7 @@
++ptr; ++ptr;
if (ptr < lim) if (ptr < lim)
++ptr; ++ptr;
@@ -825,11 +979,70 @@ @@ -825,11 +976,70 @@
return ptr; return ptr;
} }
@ -1619,7 +1626,7 @@
+ +
+ memset (&state, '\0', sizeof(mbstate_t)); + memset (&state, '\0', sizeof(mbstate_t));
+ +
+ if (tab[0] != TAB_DEFAULT) + if (tab_length)
+ while (ptr < lim && sword--) + while (ptr < lim && sword--)
+ { + {
+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
@ -1674,12 +1681,12 @@
{ {
register char *ptr = line->text, *lim = ptr + line->length - 1; register char *ptr = line->text, *lim = ptr + line->length - 1;
register size_t eword = key->eword, echar = key->echar; register size_t eword = key->eword, echar = key->echar;
@@ -842,10 +1055,10 @@ @@ -842,10 +1052,10 @@
`beginning' is the first character following the delimiting TAB. `beginning' is the first character following the delimiting TAB.
Otherwise, leave PTR pointing at the first `blank' character after Otherwise, leave PTR pointing at the first `blank' character after
the preceding field. */ the preceding field. */
- if (tab != TAB_DEFAULT) - if (tab != TAB_DEFAULT)
+ if (tab[0] != TAB_DEFAULT) + if (tab_length)
while (ptr < lim && eword--) while (ptr < lim && eword--)
{ {
- while (ptr < lim && *ptr != tab) - while (ptr < lim && *ptr != tab)
@ -1687,12 +1694,12 @@
++ptr; ++ptr;
if (ptr < lim && (eword | echar)) if (ptr < lim && (eword | echar))
++ptr; ++ptr;
@@ -891,10 +1104,10 @@ @@ -891,10 +1101,10 @@
*/ */
/* Make LIM point to the end of (one byte past) the current field. */ /* Make LIM point to the end of (one byte past) the current field. */
- if (tab != TAB_DEFAULT) - if (tab != TAB_DEFAULT)
+ if (tab[0] != TAB_DEFAULT) + if (tab_length)
{ {
char *newlim; char *newlim;
- newlim = memchr (ptr, tab, lim - ptr); - newlim = memchr (ptr, tab, lim - ptr);
@ -1700,7 +1707,7 @@
if (newlim) if (newlim)
lim = newlim; lim = newlim;
} }
@@ -926,15 +1139,137 @@ @@ -926,15 +1136,137 @@
return ptr; return ptr;
} }
@ -1716,7 +1723,7 @@
+ +
+ memset (&state, '\0', sizeof(mbstate_t)); + memset (&state, '\0', sizeof(mbstate_t));
+ +
+ if (tab[0] != TAB_DEFAULT) + if (tab_length)
+ while (ptr < lim && eword--) + while (ptr < lim && eword--)
+ { + {
+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
@ -1747,7 +1754,7 @@
+ +
+# ifdef POSIX_UNSPECIFIED +# ifdef POSIX_UNSPECIFIED
+ /* Make LIM point to the end of (one byte past) the current field. */ + /* Make LIM point to the end of (one byte past) the current field. */
+ if (tab[0] != TAB_DEFAULT) + if (tab_length)
+ { + {
+ char *newlim, *p; + char *newlim, *p;
+ +
@ -1842,7 +1849,7 @@
} }
/* Fill BUF reading from FP, moving buf->left bytes from the end /* Fill BUF reading from FP, moving buf->left bytes from the end
@@ -1019,8 +1354,22 @@ @@ -1019,8 +1351,22 @@
else else
{ {
if (key->skipsblanks) if (key->skipsblanks)
@ -1867,7 +1874,7 @@
line->keybeg = line_start; line->keybeg = line_start;
} }
if (key->skipeblanks) if (key->skipeblanks)
@@ -1128,13 +1477,32 @@ @@ -1128,13 +1474,32 @@
register int tmpa, tmpb, tmp; register int tmpa, tmpb, tmp;
register size_t log_a, log_b; register size_t log_a, log_b;
@ -1906,7 +1913,7 @@
if (tmpa == NEGATION_SIGN) if (tmpa == NEGATION_SIGN)
{ {
@@ -1268,15 +1636,60 @@ @@ -1268,15 +1633,60 @@
/* FIXME: maybe add option to try expensive FP conversion /* FIXME: maybe add option to try expensive FP conversion
only if A and B can't be compared more cheaply/accurately. */ only if A and B can't be compared more cheaply/accurately. */
@ -1974,7 +1981,7 @@
return 1; return 1;
/* Sort numbers in the usual way, where -0 == +0. Put NaNs after /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
@@ -1294,7 +1707,7 @@ @@ -1294,7 +1704,7 @@
Return 0 if the name in S is not recognized. */ Return 0 if the name in S is not recognized. */
static int static int
@ -1983,7 +1990,7 @@
{ {
char *month; char *month;
register size_t i; register size_t i;
@@ -1332,11 +1745,79 @@ @@ -1332,11 +1742,79 @@
return result; return result;
} }
@ -2064,7 +2071,7 @@
{ {
struct keyfield const *key = keylist; struct keyfield const *key = keylist;
@@ -1507,6 +1988,187 @@ @@ -1507,6 +1985,187 @@
return key->reverse ? -diff : diff; return key->reverse ? -diff : diff;
} }
@ -2252,7 +2259,7 @@
/* Compare two lines A and B, returning negative, zero, or positive /* Compare two lines A and B, returning negative, zero, or positive
depending on whether A compares less than, equal to, or greater than B. */ depending on whether A compares less than, equal to, or greater than B. */
@@ -2252,20 +2914,44 @@ @@ -2252,20 +2911,44 @@
{ {
struct lconv const *lconvp = localeconv (); struct lconv const *lconvp = localeconv ();
@ -2302,13 +2309,14 @@
have_read_stdin = false; have_read_stdin = false;
inittables (); inittables ();
@@ -2462,13 +3148,47 @@ @@ -2462,13 +3145,47 @@
case 't': case 't':
{ {
- int newtab = optarg[0]; - int newtab = optarg[0];
- if (! newtab) - if (! newtab)
+ char newtab[MB_LEN_MAX + 1]; + char newtab[MB_LEN_MAX + 1];
+ size_t newtab_length = 1;
+ strncpy (newtab, optarg, MB_LEN_MAX); + strncpy (newtab, optarg, MB_LEN_MAX);
+ if (! newtab[0]) + if (! newtab[0])
error (SORT_FAILURE, 0, _("empty tab")); error (SORT_FAILURE, 0, _("empty tab"));
@ -2317,7 +2325,7 @@
+ { + {
+ wchar_t wc; + wchar_t wc;
+ mbstate_t state; + mbstate_t state;
+ size_t newtab_length, i; + size_t i;
+ +
+ memset (&state, '\0', sizeof (mbstate_t)); + memset (&state, '\0', sizeof (mbstate_t));
+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, MB_LEN_MAX), &state); + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, MB_LEN_MAX), &state);
@ -2331,6 +2339,8 @@
+ +
+ if (optarg[newtab_length]) + if (optarg[newtab_length])
+ { + {
+ if (strcmp (optarg, "\\0") == 0)
+ newtab[0] = '\0';
+ /* Provoke with `sort -txx'. Complain about + /* Provoke with `sort -txx'. Complain about
+ "multi-character tab" instead of "multibyte tab", so + "multi-character tab" instead of "multibyte tab", so
+ that the diagnostic's wording does not need to be + that the diagnostic's wording does not need to be
@ -2338,9 +2348,6 @@
+ error (SORT_FAILURE, 0, _("multi-character tab `%s'"), + error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
+ optarg); + optarg);
+ } + }
+
+ for (i = 0; i < newtab_length; i++)
+ tab[i] = newtab[i];
+ } + }
+ else + else
+#endif +#endif
@ -2353,15 +2360,18 @@
else else
{ {
/* Provoke with `sort -txx'. Complain about /* Provoke with `sort -txx'. Complain about
@@ -2479,9 +3199,9 @@ @@ -2479,9 +3196,12 @@
optarg); optarg);
} }
} }
- if (tab != TAB_DEFAULT && tab != newtab) - if (tab != TAB_DEFAULT && tab != newtab)
+ if (tab[0] != TAB_DEFAULT && tab[0] != newtab[0]) + if (tab_length
+ && (tab_length != newtab_length
+ || memcmp (tab, newtab, tab_length) != 0))
error (SORT_FAILURE, 0, _("incompatible tabs")); error (SORT_FAILURE, 0, _("incompatible tabs"));
- tab = newtab; - tab = newtab;
+ tab[0] = newtab[0]; + memcpy (tab, newtab, newtab_length);
+ tab_length = newtab_length;
} }
break; break;
@ -4095,3 +4105,130 @@
if (have_read_stdin && fclose (stdin) == EOF) if (have_read_stdin && fclose (stdin) == EOF)
error (EXIT_FAILURE, errno, "-"); error (EXIT_FAILURE, errno, "-");
--- coreutils-5.2.1/tests/sort/sort-mb-tests.jj 2005-02-09 07:34:40.000000000 -0500
+++ coreutils-5.2.1/tests/sort/sort-mb-tests 2005-02-09 07:34:40.000000000 -0500
@@ -0,0 +1,58 @@
+#! /bin/sh
+case $# in
+ 0) xx='../../src/sort';;
+ *) xx="$1";;
+esac
+test "$VERBOSE" && echo=echo || echo=:
+$echo testing program: $xx
+errors=0
+test "$srcdir" || srcdir=.
+test "$VERBOSE" && $xx --version 2> /dev/null
+
+export LC_ALL=en_US.UTF-8
+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
+errors=0
+
+$xx -t +1 -n mb1.I > mb1.O
+code=$?
+if test $code != 0; then
+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
+ errors=`expr $errors + 1`
+else
+ cmp mb1.O $srcdir/mb1.X > /dev/null 2>&1
+ case $? in
+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
+ 1) $echo "Test mb1 failed: files mb1.O and $srcdir/mb1.X differ" 1>&2
+ (diff -c mb1.O $srcdir/mb1.X) 2> /dev/null
+ errors=`expr $errors + 1`;;
+ 2) $echo "Test mb1 may have failed." 1>&2
+ $echo The command "cmp mb1.O $srcdir/mb1.X" failed. 1>&2
+ errors=`expr $errors + 1`;;
+ esac
+fi
+
+$xx -t +3 -n mb2.I > mb2.O
+code=$?
+if test $code != 0; then
+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
+ errors=`expr $errors + 1`
+else
+ cmp mb2.O $srcdir/mb2.X > /dev/null 2>&1
+ case $? in
+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
+ 1) $echo "Test mb2 failed: files mb2.O and $srcdir/mb2.X differ" 1>&2
+ (diff -c mb2.O $srcdir/mb2.X) 2> /dev/null
+ errors=`expr $errors + 1`;;
+ 2) $echo "Test mb2 may have failed." 1>&2
+ $echo The command "cmp mb2.O $srcdir/mb2.X" failed. 1>&2
+ errors=`expr $errors + 1`;;
+ esac
+fi
+
+if test $errors = 0; then
+ $echo Passed all 113 tests. 1>&2
+else
+ $echo Failed $errors tests. 1>&2
+fi
+test $errors = 0 || errors=1
+exit $errors
--- coreutils-5.2.1/tests/sort/mb1.I.jj 2005-02-09 07:34:40.000000000 -0500
+++ coreutils-5.2.1/tests/sort/mb1.I 2005-02-09 07:34:40.000000000 -0500
@@ -0,0 +1,4 @@
+Apple10
+Banana5
+Citrus20
+Cherry30
--- coreutils-5.2.1/tests/sort/mb2.I.jj 2005-02-09 07:34:40.000000000 -0500
+++ coreutils-5.2.1/tests/sort/mb2.I 2005-02-09 07:34:40.000000000 -0500
@@ -0,0 +1,4 @@
+Apple1020
+Banana530
+Citrus205
+Cherry3010
--- coreutils-5.2.1/tests/sort/mb1.X.jj 2005-02-09 07:34:40.000000000 -0500
+++ coreutils-5.2.1/tests/sort/mb1.X 2005-02-09 07:34:40.000000000 -0500
@@ -0,0 +1,4 @@
+Banana5
+Apple10
+Citrus20
+Cherry30
--- coreutils-5.2.1/tests/sort/mb2.X.jj 2005-02-09 07:34:40.000000000 -0500
+++ coreutils-5.2.1/tests/sort/mb2.X 2005-02-09 07:34:40.000000000 -0500
@@ -0,0 +1,4 @@
+Citrus205
+Cherry3010
+Apple1020
+Banana530
--- coreutils-5.2.1/tests/sort/Makefile.am.jj 2004-02-11 06:54:14.000000000 -0500
+++ coreutils-5.2.1/tests/sort/Makefile.am 2005-02-09 07:36:20.000000000 -0500
@@ -43,12 +43,14 @@ o-no-file1.E create-empty.O create-empty
nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
##test-files-end
-EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
-noinst_SCRIPTS = $x-tests
+run_gen += mb1.O mb2.O
+
+EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
+noinst_SCRIPTS = $x-tests $x-mb-tests
editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
-TESTS = $x-tests
+TESTS = $x-tests $x-mb-tests
mk_script = $(srcdir)/../mk-script
$(srcdir)/$x-tests: $(mk_script) Test.pm
--- coreutils-5.2.1/tests/sort/Makefile.in.jj 2004-03-11 03:58:06.000000000 -0500
+++ coreutils-5.2.1/tests/sort/Makefile.in 2005-02-09 07:36:50.000000000 -0500
@@ -301,10 +301,13 @@ n10b.E n11a.O n11a.E n11b.O n11b.E 01a.O
o-no-file1.E create-empty.O create-empty.E neg-nls.O neg-nls.E nul-nls.O \
nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
+run_gen += mb1.O mb2.O
+
+EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
-noinst_SCRIPTS = $x-tests
+noinst_SCRIPTS = $x-tests $x-mb-tests
editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
-TESTS = $x-tests
+TESTS = $x-tests $x-mb-tests
mk_script = $(srcdir)/../mk-script
MAINTAINERCLEANFILES = $x-tests $(maint_gen)
CLEANFILES = $(run_gen)

View File

@ -4,7 +4,7 @@
Summary: The GNU core utilities: a set of tools commonly used in shell scripts Summary: The GNU core utilities: a set of tools commonly used in shell scripts
Name: coreutils Name: coreutils
Version: 5.2.1 Version: 5.2.1
Release: 40 Release: 41
License: GPL License: GPL
Group: System Environment/Base Group: System Environment/Base
Url: http://www.gnu.org/software/coreutils/ Url: http://www.gnu.org/software/coreutils/
@ -122,6 +122,8 @@ the old GNU fileutils, sh-utils, and textutils packages.
# (bug #102033). # (bug #102033).
perl -pi -e 's/basic-1//g' tests/stty/Makefile* perl -pi -e 's/basic-1//g' tests/stty/Makefile*
chmod a+x tests/sort/sort-mb-tests
%build %build
%ifarch s390 s390x %ifarch s390 s390x
export CFLAGS="$RPM_OPT_FLAGS -fPIC" export CFLAGS="$RPM_OPT_FLAGS -fPIC"
@ -249,6 +251,9 @@ fi
/sbin/runuser /sbin/runuser
%changelog %changelog
* Wed Feb 9 2005 Tim Waugh <twaugh@redhat.com> 5.2.1-41
- Jakub's sort -t multibyte fixes (bug #147567).
* Sat Feb 5 2005 Tim Waugh <twaugh@redhat.com> 5.2.1-40 * Sat Feb 5 2005 Tim Waugh <twaugh@redhat.com> 5.2.1-40
- Undo last change (bug #145266). - Undo last change (bug #145266).