- Bypass kwset matching when ignoring case and processing multibyte input

(bug #143079).
This commit is contained in:
Tim Waugh 2004-12-16 17:50:47 +00:00
parent 8443e177c3
commit a5ca95c10c
2 changed files with 23 additions and 14 deletions

View File

@ -1,5 +1,5 @@
--- grep-2.5.1/src/search.c 2004-12-14 15:55:21.257729918 +0000
+++ grep-2.5.1/src/search.c 2004-12-16 16:53:01.110921088 +0000
--- grep-2.5.1/src/search.c 2004-12-16 16:53:01.110921088 +0000
+++ grep-2.5.1/src/search.c 2004-12-16 17:46:57.039678304 +0000
@@ -39,6 +39,9 @@
#ifdef HAVE_LIBPCRE
# include <pcre.h>
@ -125,7 +125,7 @@
#endif /* MBS_SUPPORT */
buflim = buf + size;
@@ -373,18 +337,56 @@
@@ -373,21 +337,60 @@
if (kwset)
{
/* Find a possible match using the KWset matcher. */
@ -134,8 +134,8 @@
+ size_t bytes_left = 0;
+#endif /* MBS_SUPPORT */
+ size_t offset;
+ /* kwsexec doesn't work with match_icase and multibyte input. */
+#ifdef MBS_SUPPORT
+ /* kwsexec doesn't work with match_icase and multibyte input. */
+ if (match_icase && MB_CUR_MAX > 1)
+ /* Avoid kwset */
+ offset = 0;
@ -184,8 +184,13 @@
+#endif /* MBS_SUPPORT */
while (beg > buf && beg[-1] != eol)
--beg;
if (kwsm.index < kwset_exact_matches)
@@ -395,13 +397,47 @@
- if (kwsm.index < kwset_exact_matches)
+ if (!(match_icase && MB_CUR_MAX > 1) &&
+ (kwsm.index < kwset_exact_matches))
goto success_in_beg_and_end;
if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
continue;
@@ -395,13 +398,47 @@
else
{
/* No good fixed strings; start with DFA. */
@ -233,7 +238,7 @@
while (beg > buf && beg[-1] != eol)
--beg;
}
@@ -469,15 +505,6 @@
@@ -469,15 +506,6 @@
} /* for (beg = end ..) */
failure:
@ -249,7 +254,7 @@
return (size_t) -1;
success_in_beg_and_end:
@@ -486,15 +513,6 @@
@@ -486,15 +514,6 @@
/* FALLTHROUGH */
success_in_start_and_len:
@ -265,7 +270,7 @@
*match_size = len;
return start;
}
@@ -504,6 +522,7 @@
@@ -504,6 +523,7 @@
{
char const *beg, *lim, *err;
@ -273,7 +278,7 @@
kwsinit ();
beg = pattern;
do
@@ -531,17 +550,8 @@
@@ -531,17 +551,8 @@
struct kwsmatch kwsmatch;
size_t ret_val;
#ifdef MBS_SUPPORT
@ -293,7 +298,7 @@
#endif /* MBS_SUPPORT */
for (beg = buf; beg <= buf + size; ++beg)
@@ -550,8 +560,33 @@
@@ -550,8 +561,33 @@
if (offset == (size_t) -1)
goto failure;
#ifdef MBS_SUPPORT
@ -329,7 +334,7 @@
#endif /* MBS_SUPPORT */
beg += offset;
len = kwsmatch.size[0];
@@ -587,6 +622,36 @@
@@ -587,6 +623,36 @@
if (offset == -1) {
break; /* Try a different anchor. */
}
@ -366,7 +371,7 @@
beg += offset;
len = kwsmatch.size[0];
}
@@ -597,19 +662,31 @@
@@ -597,19 +663,31 @@
}
failure:
@ -406,7 +411,7 @@
end++;
while (buf < beg && beg[-1] != eol)
--beg;
@@ -618,15 +695,6 @@
@@ -618,15 +696,6 @@
success_in_beg_and_len:
*match_size = len;

View File

@ -85,6 +85,10 @@ fi
%{_mandir}/*/*
%changelog
* Thu Dec 16 2004 Tim Waugh <twaugh@redhat.com>
- Bypass kwset matching when ignoring case and processing multibyte input
(bug #143079).
* Tue Dec 14 2004 Tim Waugh <twaugh@redhat.com> 2.5.1-42
- Further UTF-8 processing avoided since a '\n' byte is always an
end-of-line character in that encoding.