diff --git a/coreutils-8.5-trcaseconversion.patch b/coreutils-8.5-trcaseconversion.patch new file mode 100644 index 0000000..6cd66ba --- /dev/null +++ b/coreutils-8.5-trcaseconversion.patch @@ -0,0 +1,431 @@ +From: Pádraig Brady
+Date: Mon, 27 Sep 2010 06:16:44 +0000 (+0100)
+Subject: tr: fix various issues with case conversion
+X-Git-Url: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=commitdiff_plain;h=3f48829c;hp=704eedab034e24814067c535d3577f165c9a8b68
+
+tr: fix various issues with case conversion
+
+This valid translation spec aborted:
+ LC_ALL=en_US.iso-8859-1 tr '[:upper:]- ' '[:lower:]_'
+This invalid translation spec aborted:
+ LC_ALL=en_US.iso-8859-1 tr '[:upper:] ' '[:lower:]'
+This was caused by commit 6efd1046, 05-01-2008,
+"Avoid tr case-conversion failure in some locales"
+
+This misaligned conversion spec was allowed:
+ LC_ALL=C tr 'A-Y[:lower:]' 'a-z[:upper:]'
+This was caused by commit af5d0c36, 21-10-2007,
+"tr: do not reject an unmatched [:lower:] or [:upper:] in SET1"
+
+This misaligned spec was allowed by extending the class:
+ LC_ALL=C tr '[:upper:] ' '[:lower:]'
+
+* src/tr.c (validate_case_classes): A new function to check
+alignment of case conversion classes. Also it adjusts the
+length of the sets so that locales with different numbers of
+upper and lower case characters, don't cause issues.
+(string2_extend): Disallow extending the case conversion
+class as in the above example. That is locale dependent
+and most likely not what the user wants.
+(validate): Do the simple test for "restricted" char classes
+earlier, so we don't redundantly do more expensive validation.
+(main): Remove the case class validation, and simplify.
+* tests/misc/tr-case-class: A new test to test the various
+alignment and locale issues, associated with case conversion.
+* tests/misc/tr: Move case conversion tests to new tr-case-class.
+* tests/Makefile.am: Reference the new test.
+---
+
+diff --git a/src/tr.c b/src/tr.c
+index a5b6810..479d3d3 100644
+--- a/src/tr.c
++++ b/src/tr.c
+@@ -1177,6 +1177,78 @@ card_of_complement (struct Spec_list *s)
+ return cardinality;
+ }
+
++/* Discard the lengths associated with a case conversion,
++ as using the actual number of upper or lower case characters
++ is problematic when they don't match in some locales.
++ Also ensure the case conversion classes in string2 are
++ aligned correctly with those in string1.
++ Note POSIX says the behavior of `tr "[:upper:]" "[:upper:]"'
++ is undefined. Therefore we allow it (unlike Solaris)
++ and treat it as a no-op. */
++
++static void
++validate_case_classes (struct Spec_list *s1, struct Spec_list *s2)
++{
++ size_t n_upper = 0;
++ size_t n_lower = 0;
++ unsigned int i;
++ int c1 = 0;
++ int c2 = 0;
++ count old_s1_len = s1->length;
++ count old_s2_len = s2->length;
++ struct List_element *s1_tail = s1->tail;
++ struct List_element *s2_tail = s2->tail;
++ bool s1_new_element = true;
++ bool s2_new_element = true;
++
++ if (!s2->has_char_class)
++ return;
++
++ for (i = 0; i < N_CHARS; i++)
++ {
++ if (isupper (i))
++ n_upper++;
++ if (islower (i))
++ n_lower++;
++ }
++
++ s1->state = BEGIN_STATE;
++ s2->state = BEGIN_STATE;
++
++ while (c1 != -1 && c2 != -1)
++ {
++ enum Upper_Lower_class class_s1, class_s2;
++
++ c1 = get_next (s1, &class_s1);
++ c2 = get_next (s2, &class_s2);
++
++ /* If c2 transitions to a new case class, then
++ c1 must also transition at the same time. */
++ if (s2_new_element && class_s2 != UL_NONE
++ && !(s1_new_element && class_s1 != UL_NONE))
++ error (EXIT_FAILURE, 0,
++ _("misaligned [:upper:] and/or [:lower:] construct"));
++
++ /* If case converting, quickly skip over the elements. */
++ if (class_s2 != UL_NONE)
++ {
++ skip_construct (s1);
++ skip_construct (s2);
++ /* Discount insignificant/problematic lengths. */
++ s1->length -= (class_s1 == UL_UPPER ? n_upper : n_lower) - 1;
++ s2->length -= (class_s2 == UL_UPPER ? n_upper : n_lower) - 1;
++ }
++
++ s1_new_element = s1->state == NEW_ELEMENT; /* Next element is new. */
++ s2_new_element = s2->state == NEW_ELEMENT; /* Next element is new. */
++ }
++
++ assert (old_s1_len >= s1->length && old_s2_len >= s2->length);
++
++ s1->tail = s1_tail;
++ s2->tail = s2_tail;
++}
++
+ /* Gather statistics about the spec-list S in preparation for the tests
+ in validate that determine the consistency of the specs. This function
+ is called at most twice; once for string1, and again for any string2.
+@@ -1318,20 +1390,14 @@ parse_str (char const *s, struct Spec_list *spec_list)
+ Upon successful completion, S2->length is set to S1->length. The only
+ way this function can fail to make S2 as long as S1 is when S2 has
+ zero-length, since in that case, there is no last character to repeat.
+- So S2->length is required to be at least 1.
++ So S2->length is required to be at least 1. */
+
+- Providing this functionality allows the user to do some pretty
+- non-BSD (and non-portable) things: For example, the command
+- tr -cs '[:upper:]0-9' '[:lower:]'
+- is almost guaranteed to give results that depend on your collating
+- sequence. */
+
+ static void
+ string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
+ {
+ struct List_element *p;
+ unsigned char char_to_repeat;
+- int i;
+
+ assert (translating);
+ assert (s1->length > s2->length);
+@@ -1347,11 +1413,13 @@ string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
+ char_to_repeat = p->u.range.last_char;
+ break;
+ case RE_CHAR_CLASS:
+- for (i = N_CHARS - 1; i >= 0; i--)
+- if (is_char_class_member (p->u.char_class, i))
+- break;
+- assert (i >= 0);
+- char_to_repeat = i;
++ /* Note BSD allows extending of classes in string2. For example:
++ tr '[:upper:]0-9' '[:lower:]'
++ That's not portable however, contradicts POSIX and is dependent
++ on your collating sequence. */
++ error (EXIT_FAILURE, 0,
++ _("when translating with string1 longer than string2,\n\
++the latter string must not end with a character class"));
+ break;
+
+ case RE_REPEATED_CHAR:
+@@ -1431,6 +1499,15 @@ validate (struct Spec_list *s1, struct Spec_list *s2)
+ when translating"));
+ }
+
++ if (s2->has_restricted_char_class)
++ {
++ error (EXIT_FAILURE, 0,
++ _("when translating, the only character classes that may \
++appear in\nstring2 are `upper' and `lower'"));
++ }
++
++ validate_case_classes (s1, s2);
++
+ if (s1->length > s2->length)
+ {
+ if (!truncate_set1)
+@@ -1452,13 +1529,6 @@ when translating"));
+ _("when translating with complemented character classes,\
+ \nstring2 must map all characters in the domain to one"));
+ }
+-
+- if (s2->has_restricted_char_class)
+- {
+- error (EXIT_FAILURE, 0,
+- _("when translating, the only character classes that may \
+-appear in\nstring2 are `upper' and `lower'"));
+- }
+ }
+ else
+ /* Not translating. */
+@@ -1812,7 +1882,6 @@ main (int argc, char **argv)
+ {
+ int c1, c2;
+ int i;
+- bool case_convert = false;
+ enum Upper_Lower_class class_s1;
+ enum Upper_Lower_class class_s2;
+
+@@ -1822,47 +1891,21 @@ main (int argc, char **argv)
+ s2->state = BEGIN_STATE;
+ while (true)
+ {
+- /* When the previous pair identified case-converting classes,
+- advance S1 and S2 so that each points to the following
+- construct. */
+- if (case_convert)
+- {
+- skip_construct (s1);
+- skip_construct (s2);
+- case_convert = false;
+- }
+-
+ c1 = get_next (s1, &class_s1);
+ c2 = get_next (s2, &class_s2);
+
+- /* When translating and there is an [:upper:] or [:lower:]
+- class in SET2, then there must be a corresponding [:lower:]
+- or [:upper:] class in SET1. */
+- if (class_s1 == UL_NONE
+- && (class_s2 == UL_LOWER || class_s2 == UL_UPPER))
+- error (EXIT_FAILURE, 0,
+- _("misaligned [:upper:] and/or [:lower:] construct"));
+-
+ if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
+ {
+- case_convert = true;
+ for (i = 0; i < N_CHARS; i++)
+ if (islower (i))
+ xlate[i] = toupper (i);
+ }
+ else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
+ {
+- case_convert = true;
+ for (i = 0; i < N_CHARS; i++)
+ if (isupper (i))
+ xlate[i] = tolower (i);
+ }
+- else if ((class_s1 == UL_LOWER && class_s2 == UL_LOWER)
+- || (class_s1 == UL_UPPER && class_s2 == UL_UPPER))
+- {
+- /* POSIX says the behavior of `tr "[:upper:]" "[:upper:]"'
+- is undefined. Treat it as a no-op. */
+- }
+ else
+ {
+ /* The following should have been checked by validate... */
+@@ -1870,6 +1913,13 @@ main (int argc, char **argv)
+ break;
+ xlate[c1] = c2;
+ }
++
++ /* When case-converting, skip the elements as an optimization. */
++ if (class_s2 != UL_NONE)
++ {
++ skip_construct (s1);
++ skip_construct (s2);
++ }
+ }
+ assert (c1 == -1 || truncate_set1);
+ }
+diff --git a/tests/Makefile.am b/tests/Makefile.am
+index 5619d0b..3236637 100644
+--- a/tests/Makefile.am
++++ b/tests/Makefile.am
+@@ -260,6 +260,7 @@ TESTS = \
+ misc/timeout \
+ misc/timeout-parameters \
+ misc/tr \
++ misc/tr-case-class \
+ misc/truncate-dangling-symlink \
+ misc/truncate-dir-fail \
+ misc/truncate-fail-diag \
+diff --git a/tests/misc/tr b/tests/misc/tr
+index ca7a960..00cd8e6 100755
+--- a/tests/misc/tr
++++ b/tests/misc/tr
+@@ -155,34 +155,8 @@ my @Tests =
+
+ # Up to coreutils-6.9, this would provoke a failed assertion.
+ ['no-abort-1', qw(-c a '[b*256]'), {IN=>'abc'}, {OUT=>'abb'}],
+-
+- # Up to coreutils-6.9, tr rejected an unmatched [:lower:] or [:upper:] in SET1.
+- ['s1-lower', qw('[:lower:]' '[.*]'),
+- {IN=>'#$%123abcABC'}, {OUT=>'#$%123...ABC'}],
+- ['s1-upper', qw('[:upper:]' '[.*]'),
+- {IN=>'#$%123abcABC'}, {OUT=>'#$%123abc...'}],
+-
+- # Up to coreutils-6.9.91, this would fail with the diagnostic:
+- # tr: misaligned [:upper:] and/or [:lower:] construct
+- # with LC_CTYPE=en_US.ISO-8859-1.
+- ['tolower-F', qw('[:upper:]' '[:lower:]'), {IN=>'A'}, {OUT=>'a'}],
+-
+- # When doing a case-converting translation with something after the
+- # [:upper:] and [:lower:] elements, ensure that tr honors the following byte.
+- ['upcase-xtra', qw('[:lower:].' '[:upper:]x'), {IN=>'abc.'}, {OUT=>'ABCx'}],
+- ['dncase-xtra', qw('[:upper:].' '[:lower:]x'), {IN=>'ABC.'}, {OUT=>'abcx'}],
+ );
+
+-# Set LC_CTYPE=en_US.ISO-8859-1 in the environment of the tolower-F test.
+-foreach my $t (@Tests)
+- {
+- if ($t->[0] eq 'tolower-F')
+- {
+- push @$t, {ENV=>'LC_CTYPE=en_US.ISO-8859-1'};
+- last;
+- }
+- }
+-
+ @Tests = triple_test \@Tests;
+
+ # tr takes its input only from stdin, not from a file argument, so
+diff --git a/tests/misc/tr-case-class b/tests/misc/tr-case-class
+new file mode 100755
+index 0000000..d81c676
+--- /dev/null
++++ b/tests/misc/tr-case-class
+@@ -0,0 +1,112 @@
++#!/bin/sh
++# Test case conversion classes
++
++# Copyright (C) 2010 Free Software Foundation, Inc.
++
++# This program is free software: you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation, either version 3 of the License, or
++# (at your option) any later version.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++
++# You should have received a copy of the GNU General Public License
++# along with this program. If not, see