fix sort bugs (including #485715) for multibyte locales

This commit is contained in:
Ondrej Vasik 2009-03-02 12:42:55 +00:00
parent c39fe6a1cc
commit bfff2133f5
3 changed files with 44 additions and 19 deletions

View File

@ -64,7 +64,18 @@ diff -urNp coreutils-7.1-orig/src/sort.c coreutils-7.1/src/sort.c
diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort
--- coreutils-7.1-orig/tests/misc/sort 2009-01-27 22:11:25.000000000 +0100 --- coreutils-7.1-orig/tests/misc/sort 2009-01-27 22:11:25.000000000 +0100
+++ coreutils-7.1/tests/misc/sort 2009-02-25 16:21:48.000000000 +0100 +++ coreutils-7.1/tests/misc/sort 2009-02-25 16:21:48.000000000 +0100
@@ -110,6 +110,8 @@ my @Tests = @@ -24,6 +24,10 @@ my $prog = 'sort';
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+my $mb_locale = $ENV{LOCALE_FR_UTF8};
+! defined $mb_locale || $mb_locale eq 'none'
+ and $mb_locale = 'C';
+
# Since each test is run with a file name and with redirected stdin,
# the name in the diagnostic is either the file name or "-".
# Normalize each diagnostic to use '-'.
@@ -110,6 +114,8 @@ my @Tests =
["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
@ -73,7 +84,7 @@ diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort
# #
# report an error for `.' without following char spec # report an error for `.' without following char spec
["08a", '-k 2.,3', {EXIT=>2}, ["08a", '-k 2.,3', {EXIT=>2},
@@ -210,6 +212,10 @@ my @Tests = @@ -210,6 +216,15 @@ my @Tests =
# key start and key end. # key start and key end.
["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}], ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
@ -81,6 +92,11 @@ diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort
+# next field are not included in the sort. I.E. order should not change here. +# next field are not included in the sort. I.E. order should not change here.
+["18f", '-k1,1b', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}], +["18f", '-k1,1b', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}],
+ +
+# When ignoring leading blanks for start position, ensure blanks from
+# next field are not included in the sort. I.E. order should not change here.
+# This was noticed as an issue on fedora 8 (only in multibyte locales).
+["18g", '-k1b,1', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
# This looks odd, but works properly -- 2nd keyspec is never # This looks odd, but works properly -- 2nd keyspec is never
# used because all lines are different. # used because all lines are different.
["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}], ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],

View File

@ -1938,7 +1938,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
if (newlim) if (newlim)
lim = newlim; lim = newlim;
} }
@@ -1384,6 +1570,107 @@ @@ -1384,6 +1570,113 @@
return ptr; return ptr;
} }
@ -1952,6 +1952,9 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
+ size_t mblength; + size_t mblength;
+ mbstate_t state; + mbstate_t state;
+ +
+ if (echar == 0)
+ eword++; /* skip all of end field. */
+
+ memset (&state, '\0', sizeof(mbstate_t)); + memset (&state, '\0', sizeof(mbstate_t));
+ +
+ if (tab_length) + if (tab_length)
@ -2020,6 +2023,8 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
+ } + }
+# endif +# endif
+ +
+ if (echar != 0)
+ {
+ /* If we're skipping leading blanks, don't start counting characters + /* If we're skipping leading blanks, don't start counting characters
+ * until after skipping past any leading blanks. */ + * until after skipping past any leading blanks. */
+ if (key->skipsblanks) + if (key->skipsblanks)
@ -2038,6 +2043,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c
+ else + else
+ ptr += mblength; + ptr += mblength;
+ } + }
+ }
+ +
+ return ptr; + return ptr;
+} +}

View File

@ -1,7 +1,7 @@
Summary: A set of basic GNU tools commonly used in shell scripts Summary: A set of basic GNU tools commonly used in shell scripts
Name: coreutils Name: coreutils
Version: 7.1 Version: 7.1
Release: 5%{?dist} Release: 6%{?dist}
License: GPLv3+ License: GPLv3+
Group: System Environment/Base Group: System Environment/Base
Url: http://www.gnu.org/software/coreutils/ Url: http://www.gnu.org/software/coreutils/
@ -313,6 +313,9 @@ fi
/sbin/runuser /sbin/runuser
%changelog %changelog
* Mon Mar 02 2009 Ondrej Vasik <ovasik@redhat.com> 7.1-6
- fix sort bugs (including #485715) for multibyte locales
as well
* Fri Feb 27 2009 Ondrej Vasik <ovasik@redhat.com> 7.1-5 * Fri Feb 27 2009 Ondrej Vasik <ovasik@redhat.com> 7.1-5
- fix infinite loop in recursive cp (upstream, introduced - fix infinite loop in recursive cp (upstream, introduced
by 7.1) by 7.1)