improve i18n support in sort

test misc/sort-debug-keys is now back
This commit is contained in:
Kamil Dudka 2010-10-26 18:53:51 +02:00
parent eed90449e8
commit bd229edf8d
2 changed files with 72 additions and 67 deletions

View File

@ -1,3 +1,15 @@
lib/linebuffer.h | 8 +
src/cut.c | 420 ++++++++++++++++++++++++++++++--
src/expand.c | 160 ++++++++++++-
src/fold.c | 309 +++++++++++++++++++++--
src/join.c | 347 +++++++++++++++++++++++----
src/pr.c | 431 +++++++++++++++++++++++++++++---
src/sort.c | 704 ++++++++++++++++++++++++++++++++++++++++++++++++++---
src/unexpand.c | 226 +++++++++++++++++-
src/uniq.c | 259 +++++++++++++++++++-
tests/Makefile.am | 5 +
10 files changed, 2689 insertions(+), 180 deletions(-)
diff -urNp coreutils-8.6-orig/lib/linebuffer.h coreutils-8.6/lib/linebuffer.h diff -urNp coreutils-8.6-orig/lib/linebuffer.h coreutils-8.6/lib/linebuffer.h
--- coreutils-8.6-orig/lib/linebuffer.h 2010-06-10 18:45:26.000000000 +0200 --- coreutils-8.6-orig/lib/linebuffer.h 2010-06-10 18:45:26.000000000 +0200
+++ coreutils-8.6/lib/linebuffer.h 2010-10-18 15:18:11.932209034 +0200 +++ coreutils-8.6/lib/linebuffer.h 2010-10-18 15:18:11.932209034 +0200
@ -2417,9 +2429,10 @@ diff -urNp coreutils-8.6-orig/src/pr.c coreutils-8.6/src/pr.c
/* We've just printed some files and need to clean up things before /* We've just printed some files and need to clean up things before
looking for more options and printing the next batch of files. looking for more options and printing the next batch of files.
diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c diff --git a/src/sort.c b/src/sort.c
--- coreutils-8.6-orig/src/sort.c 2010-10-14 11:39:14.000000000 +0200 index 7e25f6a..d3f8915 100644
+++ coreutils-8.6/src/sort.c 2010-10-18 15:16:14.976458929 +0200 --- a/src/sort.c
+++ b/src/sort.c
@@ -22,11 +22,20 @@ @@ -22,11 +22,20 @@
#include <config.h> #include <config.h>
@ -2498,7 +2511,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Flag to remove consecutive duplicate lines from the output. /* Flag to remove consecutive duplicate lines from the output.
Only the last of a sequence of equal lines will be output. */ Only the last of a sequence of equal lines will be output. */
@@ -782,6 +813,44 @@ reap_some (void) @@ -782,6 +813,46 @@ reap_some (void)
update_proc (pid); update_proc (pid);
} }
@ -2509,6 +2522,8 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+(*begfield) (const struct line*, const struct keyfield *); +(*begfield) (const struct line*, const struct keyfield *);
+static char * +static char *
+(*limfield) (const struct line*, const struct keyfield *); +(*limfield) (const struct line*, const struct keyfield *);
+static void
+(*skipblanks) (const char **ptr, const char *lim);
+static int +static int
+(*getmonth) (char const *, size_t, char **); +(*getmonth) (char const *, size_t, char **);
+static int +static int
@ -2543,7 +2558,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Clean up any remaining temporary files. */ /* Clean up any remaining temporary files. */
static void static void
@@ -1205,7 +1274,7 @@ zaptemp (char const *name) @@ -1205,7 +1276,7 @@ zaptemp (char const *name)
free (node); free (node);
} }
@ -2552,7 +2567,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
static int static int
struct_month_cmp (void const *m1, void const *m2) struct_month_cmp (void const *m1, void const *m2)
@@ -1220,7 +1289,7 @@ struct_month_cmp (void const *m1, void c @@ -1220,7 +1291,7 @@ struct_month_cmp (void const *m1, void const *m2)
/* Initialize the character class tables. */ /* Initialize the character class tables. */
static void static void
@ -2561,7 +2576,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{ {
size_t i; size_t i;
@@ -1232,7 +1301,7 @@ inittables (void) @@ -1232,7 +1303,7 @@ inittables (void)
fold_toupper[i] = toupper (i); fold_toupper[i] = toupper (i);
} }
@ -2570,7 +2585,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* If we're not in the "C" locale, read different names for months. */ /* If we're not in the "C" locale, read different names for months. */
if (hard_LC_TIME) if (hard_LC_TIME)
{ {
@@ -1314,6 +1383,64 @@ specify_nmerge (int oi, char c, char con @@ -1314,6 +1385,64 @@ specify_nmerge (int oi, char c, char const *s)
xstrtol_fatal (e, oi, c, long_options, s); xstrtol_fatal (e, oi, c, long_options, s);
} }
@ -2635,7 +2650,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Specify the amount of main memory to use when sorting. */ /* Specify the amount of main memory to use when sorting. */
static void static void
specify_sort_size (int oi, char c, char const *s) specify_sort_size (int oi, char c, char const *s)
@@ -1540,7 +1667,7 @@ buffer_linelim (struct buffer const *buf @@ -1540,7 +1669,7 @@ buffer_linelim (struct buffer const *buf)
by KEY in LINE. */ by KEY in LINE. */
static char * static char *
@ -2644,7 +2659,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{ {
char *ptr = line->text, *lim = ptr + line->length - 1; char *ptr = line->text, *lim = ptr + line->length - 1;
size_t sword = key->sword; size_t sword = key->sword;
@@ -1549,10 +1676,10 @@ begfield (struct line const *line, struc @@ -1549,10 +1678,10 @@ begfield (struct line const *line, struct keyfield const *key)
/* The leading field separator itself is included in a field when -t /* The leading field separator itself is included in a field when -t
is absent. */ is absent. */
@ -2657,7 +2672,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
++ptr; ++ptr;
if (ptr < lim) if (ptr < lim)
++ptr; ++ptr;
@@ -1578,11 +1705,70 @@ begfield (struct line const *line, struc @@ -1578,11 +1707,70 @@ begfield (struct line const *line, struct keyfield const *key)
return ptr; return ptr;
} }
@ -2729,7 +2744,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{ {
char *ptr = line->text, *lim = ptr + line->length - 1; char *ptr = line->text, *lim = ptr + line->length - 1;
size_t eword = key->eword, echar = key->echar; size_t eword = key->eword, echar = key->echar;
@@ -1597,10 +1783,10 @@ limfield (struct line const *line, struc @@ -1597,10 +1785,10 @@ limfield (struct line const *line, struct keyfield const *key)
`beginning' is the first character following the delimiting TAB. `beginning' is the first character following the delimiting TAB.
Otherwise, leave PTR pointing at the first `blank' character after Otherwise, leave PTR pointing at the first `blank' character after
the preceding field. */ the preceding field. */
@ -2742,7 +2757,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
++ptr; ++ptr;
if (ptr < lim && (eword || echar)) if (ptr < lim && (eword || echar))
++ptr; ++ptr;
@@ -1646,10 +1832,10 @@ limfield (struct line const *line, struc @@ -1646,10 +1834,10 @@ limfield (struct line const *line, struct keyfield const *key)
*/ */
/* Make LIM point to the end of (one byte past) the current field. */ /* Make LIM point to the end of (one byte past) the current field. */
@ -2755,7 +2770,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
if (newlim) if (newlim)
lim = newlim; lim = newlim;
} }
@@ -1680,6 +1866,113 @@ limfield (struct line const *line, struc @@ -1680,6 +1868,130 @@ limfield (struct line const *line, struct keyfield const *key)
return ptr; return ptr;
} }
@ -2865,11 +2880,28 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ return ptr; + return ptr;
+} +}
+#endif +#endif
+
+static void
+skipblanks_uni (const char **ptr, const char *lim)
+{
+ while (*ptr < lim && blanks[to_uchar (**ptr)])
+ ++(*ptr);
+}
+
+#if HAVE_MBRTOWC
+static void
+skipblanks_mb (const char **ptr, const char *lim)
+{
+ size_t mblength;
+ while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength))
+ (*ptr) += mblength;
+}
+#endif
+ +
/* Fill BUF reading from FP, moving buf->left bytes from the end /* Fill BUF reading from FP, moving buf->left bytes from the end
of buf->buf to the beginning first. If EOF is reached and the of buf->buf to the beginning first. If EOF is reached and the
file wasn't terminated by a newline, supply one. Set up BUF's line file wasn't terminated by a newline, supply one. Set up BUF's line
@@ -1766,8 +2059,24 @@ fillbuf (struct buffer *buf, FILE *fp, c @@ -1766,8 +2078,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
else else
{ {
if (key->skipsblanks) if (key->skipsblanks)
@ -2880,8 +2912,6 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ if (MB_CUR_MAX > 1) + if (MB_CUR_MAX > 1)
+ { + {
+ size_t mblength; + size_t mblength;
+ mbstate_t state;
+ memset (&state, '\0', sizeof(mbstate_t));
+ while (line_start < line->keylim && + while (line_start < line->keylim &&
+ ismbblank (line_start, + ismbblank (line_start,
+ line->keylim - line_start, + line->keylim - line_start,
@ -2896,7 +2926,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
line->keybeg = line_start; line->keybeg = line_start;
} }
} }
@@ -1888,7 +2197,7 @@ human_numcompare (char const *a, char co @@ -1888,7 +2214,7 @@ human_numcompare (char const *a, char const *b)
hideously fast. */ hideously fast. */
static int static int
@ -2905,7 +2935,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{ {
while (blanks[to_uchar (*a)]) while (blanks[to_uchar (*a)])
a++; a++;
@@ -1898,6 +2207,25 @@ numcompare (char const *a, char const *b @@ -1898,6 +2224,25 @@ numcompare (char const *a, char const *b)
return strnumcmp (a, b, decimal_point, thousands_sep); return strnumcmp (a, b, decimal_point, thousands_sep);
} }
@ -2931,7 +2961,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
static int static int
general_numcompare (char const *sa, char const *sb) general_numcompare (char const *sa, char const *sb)
{ {
@@ -1930,7 +2258,7 @@ general_numcompare (char const *sa, char @@ -1930,7 +2275,7 @@ general_numcompare (char const *sa, char const *sb)
Return 0 if the name in S is not recognized. */ Return 0 if the name in S is not recognized. */
static int static int
@ -2940,7 +2970,14 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{ {
size_t lo = 0; size_t lo = 0;
size_t hi = MONTHS_PER_YEAR; size_t hi = MONTHS_PER_YEAR;
@@ -2210,7 +2538,7 @@ debug_key (struct line const *line, stru @@ -2204,13 +2549,12 @@ debug_key (struct line const *line, struct keyfield const *key)
{
char saved = *lim; *lim = '\0';
- while (blanks[to_uchar (*beg)])
- beg++;
+ skipblanks (&beg, lim);
char *tighter_lim = beg; char *tighter_lim = beg;
if (key->month) if (key->month)
@ -2949,7 +2986,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
else if (key->general_numeric) else if (key->general_numeric)
ignore_value (strtold (beg, &tighter_lim)); ignore_value (strtold (beg, &tighter_lim));
else if (key->numeric || key->human_numeric) else if (key->numeric || key->human_numeric)
@@ -2354,7 +2682,7 @@ key_warnings (struct keyfield const *gke @@ -2354,7 +2698,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key) bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
&& !(key->schar || key->echar); && !(key->schar || key->echar);
bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */
@ -2958,7 +2995,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
&& ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned)) && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
|| (!key->skipsblanks && key->schar) || (!key->skipsblanks && key->schar)
|| (!key->skipeblanks && key->echar))) || (!key->skipeblanks && key->echar)))
@@ -2412,11 +2740,83 @@ key_warnings (struct keyfield const *gke @@ -2412,11 +2756,83 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
error (0, 0, _("option `-r' only applies to last-resort comparison")); error (0, 0, _("option `-r' only applies to last-resort comparison"));
} }
@ -3043,7 +3080,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{ {
struct keyfield *key = keylist; struct keyfield *key = keylist;
@@ -2501,7 +2898,7 @@ keycompare (struct line const *a, struct @@ -2501,7 +2917,7 @@ keycompare (struct line const *a, struct line const *b)
else if (key->human_numeric) else if (key->human_numeric)
diff = human_numcompare (ta, tb); diff = human_numcompare (ta, tb);
else if (key->month) else if (key->month)
@ -3052,7 +3089,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
else if (key->random) else if (key->random)
diff = compare_random (ta, tlena, tb, tlenb); diff = compare_random (ta, tlena, tb, tlenb);
else if (key->version) else if (key->version)
@@ -2617,6 +3014,179 @@ keycompare (struct line const *a, struct @@ -2617,6 +3033,179 @@ keycompare (struct line const *a, struct line const *b)
return key->reverse ? -diff : diff; return key->reverse ? -diff : diff;
} }
@ -3232,7 +3269,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Compare two lines A and B, returning negative, zero, or positive /* Compare two lines A and B, returning negative, zero, or positive
depending on whether A compares less than, equal to, or greater than B. */ depending on whether A compares less than, equal to, or greater than B. */
@@ -4006,7 +4576,7 @@ main (int argc, char **argv) @@ -4006,7 +4595,7 @@ main (int argc, char **argv)
initialize_exit_failure (SORT_FAILURE); initialize_exit_failure (SORT_FAILURE);
hard_LC_COLLATE = hard_locale (LC_COLLATE); hard_LC_COLLATE = hard_locale (LC_COLLATE);
@ -3241,7 +3278,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
hard_LC_TIME = hard_locale (LC_TIME); hard_LC_TIME = hard_locale (LC_TIME);
#endif #endif
@@ -4027,6 +4597,27 @@ main (int argc, char **argv) @@ -4027,6 +4616,29 @@ main (int argc, char **argv)
thousands_sep = -1; thousands_sep = -1;
} }
@ -3251,6 +3288,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ inittables = inittables_mb; + inittables = inittables_mb;
+ begfield = begfield_mb; + begfield = begfield_mb;
+ limfield = limfield_mb; + limfield = limfield_mb;
+ skipblanks = skipblanks_mb;
+ getmonth = getmonth_mb; + getmonth = getmonth_mb;
+ keycompare = keycompare_mb; + keycompare = keycompare_mb;
+ numcompare = numcompare_mb; + numcompare = numcompare_mb;
@ -3261,6 +3299,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ inittables = inittables_uni; + inittables = inittables_uni;
+ begfield = begfield_uni; + begfield = begfield_uni;
+ limfield = limfield_uni; + limfield = limfield_uni;
+ skipblanks = skipblanks_uni;
+ getmonth = getmonth_uni; + getmonth = getmonth_uni;
+ keycompare = keycompare_uni; + keycompare = keycompare_uni;
+ numcompare = numcompare_uni; + numcompare = numcompare_uni;
@ -3269,7 +3308,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
have_read_stdin = false; have_read_stdin = false;
inittables (); inittables ();
@@ -4297,13 +4888,35 @@ main (int argc, char **argv) @@ -4297,13 +4909,34 @@ main (int argc, char **argv)
case 't': case 't':
{ {
@ -3286,7 +3325,6 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ { + {
+ wchar_t wc; + wchar_t wc;
+ mbstate_t state; + mbstate_t state;
+ size_t i;
+ +
+ memset (&state, '\0', sizeof (mbstate_t)); + memset (&state, '\0', sizeof (mbstate_t));
+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
@ -3309,7 +3347,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
else else
{ {
/* Provoke with `sort -txx'. Complain about /* Provoke with `sort -txx'. Complain about
@@ -4314,9 +4927,12 @@ main (int argc, char **argv) @@ -4314,9 +4947,12 @@ main (int argc, char **argv)
quote (optarg)); quote (optarg));
} }
} }
@ -4086,39 +4124,3 @@ diff -urNp coreutils-8.6-orig/tests/misc/sort-mb-tests coreutils-8.6/tests/misc/
+fi +fi
+test $errors = 0 || errors=1 +test $errors = 0 || errors=1
+exit $errors +exit $errors
diff -urNp coreutils-8.6-orig/tests/misc/sort-debug-keys coreutils-8.6/tests/misc/sort-debug-keys
--- coreutils-8.6-orig/tests/misc/sort-debug-keys 2010-10-11 19:35:11.000000000 +0200
+++ coreutils-8.6/tests/misc/sort-debug-keys 2010-10-19 14:55:55.435692063 +0200
@@ -305,18 +305,19 @@ _____
___________________
EOF
-: ${LOCALE_FR_UTF8=none}
-if test "$LOCALE_FR_UTF8" != "none"; then
- (
- echo ' 1²---++3 1,234 Mi' |
- LC_ALL=C sort --debug -k2g -k1b,1
- echo ' 1²---++3 1,234 Mi' |
- LC_ALL=$LOCALE_FR_UTF8 sort --debug -k2g -k1b,1
- echo '+1234 1234Gi 1,234M' |
- LC_ALL=$LOCALE_FR_UTF8 sort --debug -k1,1n -k1,1g \
- -k1,1h -k2,2n -k2,2g -k2,2h -k3,3n -k3,3g -k3,3h
- ) > out
- compare out exp || fail=1
-fi
+#temporarily disable sort debug-keys test for mbyte locales (doesn't work atm.)
+#: ${LOCALE_FR_UTF8=none}
+#if test "$LOCALE_FR_UTF8" != "none"; then
+# (
+# echo ' 1²---++3 1,234 Mi' |
+# LC_ALL=C sort --debug -k2g -k1b,1
+# echo ' 1²---++3 1,234 Mi' |
+# LC_ALL=$LOCALE_FR_UTF8 sort --debug -k2g -k1b,1
+# echo '+1234 1234Gi 1,234M' |
+# LC_ALL=$LOCALE_FR_UTF8 sort --debug -k1,1n -k1,1g \
+# -k1,1h -k2,2n -k2,2g -k2,2h -k3,3n -k3,3g -k3,3h
+# ) > out
+# compare out exp || fail=1
+#fi
Exit $fail

View File

@ -1,7 +1,7 @@
Summary: A set of basic GNU tools commonly used in shell scripts Summary: A set of basic GNU tools commonly used in shell scripts
Name: coreutils Name: coreutils
Version: 8.6 Version: 8.6
Release: 1%{?dist} Release: 2%{?dist}
License: GPLv3+ License: GPLv3+
Group: System Environment/Base Group: System Environment/Base
Url: http://www.gnu.org/software/coreutils/ Url: http://www.gnu.org/software/coreutils/
@ -336,6 +336,9 @@ fi
%{_libdir}/coreutils %{_libdir}/coreutils
%changelog %changelog
* Tue Oct 26 2010 Kamil Dudka <kdudka@redhat.com> - 8.6-2
- improve i18n support in sort (debug-keys test is now back)
* Wed Oct 20 2010 Ondrej Vasik <ovasik@redhat.com> - 8.6-1 * Wed Oct 20 2010 Ondrej Vasik <ovasik@redhat.com> - 8.6-1
- new upstream release 8.6 - new upstream release 8.6
- remove applied patches, temporarily disable sort - remove applied patches, temporarily disable sort