From bfff2133f5df6a92053347be42d78bdcae69327f Mon Sep 17 00:00:00 2001 From: Ondrej Vasik Date: Mon, 2 Mar 2009 12:42:55 +0000 Subject: [PATCH] fix sort bugs (including #485715) for multibyte locales --- coreutils-7.1-sort-endoffields.patch | 20 +++++++++++++-- coreutils-i18n.patch | 38 ++++++++++++++++------------ coreutils.spec | 5 +++- 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/coreutils-7.1-sort-endoffields.patch b/coreutils-7.1-sort-endoffields.patch index 22e336f..190c4a1 100644 --- a/coreutils-7.1-sort-endoffields.patch +++ b/coreutils-7.1-sort-endoffields.patch @@ -64,7 +64,18 @@ diff -urNp coreutils-7.1-orig/src/sort.c coreutils-7.1/src/sort.c diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort --- coreutils-7.1-orig/tests/misc/sort 2009-01-27 22:11:25.000000000 +0100 +++ coreutils-7.1/tests/misc/sort 2009-02-25 16:21:48.000000000 +0100 -@@ -110,6 +110,8 @@ my @Tests = +@@ -24,6 +24,10 @@ my $prog = 'sort'; + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + # Since each test is run with a file name and with redirected stdin, + # the name in the diagnostic is either the file name or "-". + # Normalize each diagnostic to use '-'. +@@ -110,6 +114,8 @@ my @Tests = ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], @@ -73,7 +84,7 @@ diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort # # report an error for `.' without following char spec ["08a", '-k 2.,3', {EXIT=>2}, -@@ -210,6 +212,10 @@ my @Tests = +@@ -210,6 +216,15 @@ my @Tests = # key start and key end. ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}], @@ -81,6 +92,11 @@ diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort +# next field are not included in the sort. I.E. order should not change here. +["18f", '-k1,1b', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}], + ++# When ignoring leading blanks for start position, ensure blanks from ++# next field are not included in the sort. I.E. order should not change here. ++# This was noticed as an issue on fedora 8 (only in multibyte locales). ++["18g", '-k1b,1', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}, ++ {ENV => "LC_ALL=$mb_locale"}], # This looks odd, but works properly -- 2nd keyspec is never # used because all lines are different. ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}], diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index cd5852a..5e6facd 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -1938,7 +1938,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c if (newlim) lim = newlim; } -@@ -1384,6 +1570,107 @@ +@@ -1384,6 +1570,113 @@ return ptr; } @@ -1952,6 +1952,9 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c + size_t mblength; + mbstate_t state; + ++ if (echar == 0) ++ eword++; /* skip all of end field. */ ++ + memset (&state, '\0', sizeof(mbstate_t)); + + if (tab_length) @@ -2020,24 +2023,27 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c + } +# endif + -+ /* If we're skipping leading blanks, don't start counting characters -+ * until after skipping past any leading blanks. */ -+ if (key->skipsblanks) -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; ++ if (echar != 0) ++ { ++ /* If we're skipping leading blanks, don't start counting characters ++ * until after skipping past any leading blanks. */ ++ if (key->skipsblanks) ++ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) ++ ptr += mblength; + -+ memset (&state, '\0', sizeof(mbstate_t)); ++ memset (&state, '\0', sizeof(mbstate_t)); + -+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */ -+ for (i = 0; i < echar; i++) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); ++ /* Advance PTR by ECHAR (if possible), but no further than LIM. */ ++ for (i = 0; i < echar; i++) ++ { ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); + -+ if (ptr + mblength > lim) -+ break; -+ else -+ ptr += mblength; -+ } ++ if (ptr + mblength > lim) ++ break; ++ else ++ ptr += mblength; ++ } ++ } + + return ptr; +} diff --git a/coreutils.spec b/coreutils.spec index bfa4a3a..24f093e 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils Version: 7.1 -Release: 5%{?dist} +Release: 6%{?dist} License: GPLv3+ Group: System Environment/Base Url: http://www.gnu.org/software/coreutils/ @@ -313,6 +313,9 @@ fi /sbin/runuser %changelog +* Mon Mar 02 2009 Ondrej Vasik 7.1-6 +- fix sort bugs (including #485715) for multibyte locales + as well * Fri Feb 27 2009 Ondrej Vasik 7.1-5 - fix infinite loop in recursive cp (upstream, introduced by 7.1)