- Bypass kwset matching when ignoring case and processing multibyte input

(bug #143079).
This commit is contained in:
Tim Waugh 2004-12-16 17:50:47 +00:00
parent 8443e177c3
commit a5ca95c10c
2 changed files with 23 additions and 14 deletions

View File

@ -1,5 +1,5 @@
--- grep-2.5.1/src/search.c 2004-12-14 15:55:21.257729918 +0000 --- grep-2.5.1/src/search.c 2004-12-16 16:53:01.110921088 +0000
+++ grep-2.5.1/src/search.c 2004-12-16 16:53:01.110921088 +0000 +++ grep-2.5.1/src/search.c 2004-12-16 17:46:57.039678304 +0000
@@ -39,6 +39,9 @@ @@ -39,6 +39,9 @@
#ifdef HAVE_LIBPCRE #ifdef HAVE_LIBPCRE
# include <pcre.h> # include <pcre.h>
@ -125,7 +125,7 @@
#endif /* MBS_SUPPORT */ #endif /* MBS_SUPPORT */
buflim = buf + size; buflim = buf + size;
@@ -373,18 +337,56 @@ @@ -373,21 +337,60 @@
if (kwset) if (kwset)
{ {
/* Find a possible match using the KWset matcher. */ /* Find a possible match using the KWset matcher. */
@ -134,8 +134,8 @@
+ size_t bytes_left = 0; + size_t bytes_left = 0;
+#endif /* MBS_SUPPORT */ +#endif /* MBS_SUPPORT */
+ size_t offset; + size_t offset;
+ /* kwsexec doesn't work with match_icase and multibyte input. */
+#ifdef MBS_SUPPORT +#ifdef MBS_SUPPORT
+ /* kwsexec doesn't work with match_icase and multibyte input. */
+ if (match_icase && MB_CUR_MAX > 1) + if (match_icase && MB_CUR_MAX > 1)
+ /* Avoid kwset */ + /* Avoid kwset */
+ offset = 0; + offset = 0;
@ -184,8 +184,13 @@
+#endif /* MBS_SUPPORT */ +#endif /* MBS_SUPPORT */
while (beg > buf && beg[-1] != eol) while (beg > buf && beg[-1] != eol)
--beg; --beg;
if (kwsm.index < kwset_exact_matches) - if (kwsm.index < kwset_exact_matches)
@@ -395,13 +397,47 @@ + if (!(match_icase && MB_CUR_MAX > 1) &&
+ (kwsm.index < kwset_exact_matches))
goto success_in_beg_and_end;
if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
continue;
@@ -395,13 +398,47 @@
else else
{ {
/* No good fixed strings; start with DFA. */ /* No good fixed strings; start with DFA. */
@ -233,7 +238,7 @@
while (beg > buf && beg[-1] != eol) while (beg > buf && beg[-1] != eol)
--beg; --beg;
} }
@@ -469,15 +505,6 @@ @@ -469,15 +506,6 @@
} /* for (beg = end ..) */ } /* for (beg = end ..) */
failure: failure:
@ -249,7 +254,7 @@
return (size_t) -1; return (size_t) -1;
success_in_beg_and_end: success_in_beg_and_end:
@@ -486,15 +513,6 @@ @@ -486,15 +514,6 @@
/* FALLTHROUGH */ /* FALLTHROUGH */
success_in_start_and_len: success_in_start_and_len:
@ -265,7 +270,7 @@
*match_size = len; *match_size = len;
return start; return start;
} }
@@ -504,6 +522,7 @@ @@ -504,6 +523,7 @@
{ {
char const *beg, *lim, *err; char const *beg, *lim, *err;
@ -273,7 +278,7 @@
kwsinit (); kwsinit ();
beg = pattern; beg = pattern;
do do
@@ -531,17 +550,8 @@ @@ -531,17 +551,8 @@
struct kwsmatch kwsmatch; struct kwsmatch kwsmatch;
size_t ret_val; size_t ret_val;
#ifdef MBS_SUPPORT #ifdef MBS_SUPPORT
@ -293,7 +298,7 @@
#endif /* MBS_SUPPORT */ #endif /* MBS_SUPPORT */
for (beg = buf; beg <= buf + size; ++beg) for (beg = buf; beg <= buf + size; ++beg)
@@ -550,8 +560,33 @@ @@ -550,8 +561,33 @@
if (offset == (size_t) -1) if (offset == (size_t) -1)
goto failure; goto failure;
#ifdef MBS_SUPPORT #ifdef MBS_SUPPORT
@ -329,7 +334,7 @@
#endif /* MBS_SUPPORT */ #endif /* MBS_SUPPORT */
beg += offset; beg += offset;
len = kwsmatch.size[0]; len = kwsmatch.size[0];
@@ -587,6 +622,36 @@ @@ -587,6 +623,36 @@
if (offset == -1) { if (offset == -1) {
break; /* Try a different anchor. */ break; /* Try a different anchor. */
} }
@ -366,7 +371,7 @@
beg += offset; beg += offset;
len = kwsmatch.size[0]; len = kwsmatch.size[0];
} }
@@ -597,19 +662,31 @@ @@ -597,19 +663,31 @@
} }
failure: failure:
@ -406,7 +411,7 @@
end++; end++;
while (buf < beg && beg[-1] != eol) while (buf < beg && beg[-1] != eol)
--beg; --beg;
@@ -618,15 +695,6 @@ @@ -618,15 +696,6 @@
success_in_beg_and_len: success_in_beg_and_len:
*match_size = len; *match_size = len;

View File

@ -85,6 +85,10 @@ fi
%{_mandir}/*/* %{_mandir}/*/*
%changelog %changelog
* Thu Dec 16 2004 Tim Waugh <twaugh@redhat.com>
- Bypass kwset matching when ignoring case and processing multibyte input
(bug #143079).
* Tue Dec 14 2004 Tim Waugh <twaugh@redhat.com> 2.5.1-42 * Tue Dec 14 2004 Tim Waugh <twaugh@redhat.com> 2.5.1-42
- Further UTF-8 processing avoided since a '\n' byte is always an - Further UTF-8 processing avoided since a '\n' byte is always an
end-of-line character in that encoding. end-of-line character in that encoding.