65 lines
2.4 KiB
Diff
65 lines
2.4 KiB
Diff
|
commit 99d3c7e1308beb1ce9a3c535ca4b6581ebd653ee
|
||
|
Author: Paolo Bonzini <bonzini@gnu.org>
|
||
|
Date: Tue Sep 21 17:00:55 2010 +0200
|
||
|
|
||
|
dfa: process range expressions consistently with system regex
|
||
|
|
||
|
The actual meaning of range expressions in glibc is not exactly strcoll,
|
||
|
which makes the behavior of grep hard to predict when compiled with the
|
||
|
system regex. Leave to the system regex matcher the decision of which
|
||
|
single-byte characters are matched by a range expression.
|
||
|
|
||
|
This partially reverts a change made in commit 0d38a8bb (which made
|
||
|
sense at the time, but not now that src/dfa.c is not doing multibyte
|
||
|
character set matching anymore).
|
||
|
|
||
|
* src/dfa.c (in_coll_range): Remove.
|
||
|
(parse_bracket_exp): Use system regex to find which single-char
|
||
|
bytes match a range expression.
|
||
|
|
||
|
diff --git a/src/dfa.c b/src/dfa.c
|
||
|
index a2f4174..f3e066f 100644
|
||
|
--- a/src/dfa.c
|
||
|
+++ b/src/dfa.c
|
||
|
@@ -697,13 +697,6 @@ static unsigned char const *buf_end; /* reference to end in dfaexec(). */
|
||
|
|
||
|
#endif /* MBS_SUPPORT */
|
||
|
|
||
|
-static int
|
||
|
-in_coll_range (char ch, char from, char to)
|
||
|
-{
|
||
|
- char c[6] = { from, 0, ch, 0, to, 0 };
|
||
|
- return strcoll (&c[0], &c[2]) <= 0 && strcoll (&c[2], &c[4]) <= 0;
|
||
|
-}
|
||
|
-
|
||
|
typedef int predicate (int);
|
||
|
|
||
|
/* The following list maps the names of the Posix named character classes
|
||
|
@@ -979,10 +972,22 @@ parse_bracket_exp (void)
|
||
|
for (c = c1; c <= c2; c++)
|
||
|
setbit_case_fold (c, ccl);
|
||
|
else
|
||
|
- for (c = 0; c < NOTCHAR; ++c)
|
||
|
- if (!(case_fold && isupper (c))
|
||
|
- && in_coll_range (c, c1, c2))
|
||
|
- setbit_case_fold (c, ccl);
|
||
|
+ {
|
||
|
+ /* Defer to the system regex library about the meaning
|
||
|
+ of range expressions. */
|
||
|
+ regex_t re;
|
||
|
+ char pattern[6] = { '[', c1, '-', c2, ']', 0 };
|
||
|
+ char subject[2] = { 0, 0 };
|
||
|
+ regcomp (&re, pattern, REG_NOSUB);
|
||
|
+ for (c = 0; c < NOTCHAR; ++c)
|
||
|
+ {
|
||
|
+ subject[0] = c;
|
||
|
+ if (!(case_fold && isupper (c))
|
||
|
+ && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
|
||
|
+ setbit_case_fold (c, ccl);
|
||
|
+ }
|
||
|
+ regfree (&re);
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
colon_warning_state |= 8;
|