- Bypass kwset matching when ignoring case and processing multibyte input

(bug #143079).
2004-12-16 17:50:47 +00:00 · 2004-12-16 17:50:47 +00:00 · a5ca95c10c
commit a5ca95c10c
parent 8443e177c3
2 changed files with 23 additions and 14 deletions
--- a/grep-2.5.1-egf-speedup.patch
+++ b/grep-2.5.1-egf-speedup.patch
@ -1,5 +1,5 @@
--- grep-2.5.1/src/search.c	2004-12-14 15:55:21.257729918 +0000
+--- grep-2.5.1/src/search.c	2004-12-16 16:53:01.110921088 +0000
-+++ grep-2.5.1/src/search.c	2004-12-16 16:53:01.110921088 +0000
+++ grep-2.5.1/src/search.c	2004-12-16 17:46:57.039678304 +0000
@@ -39,6 +39,9 @@
 #ifdef HAVE_LIBPCRE
 # include <pcre.h>
@ -125,7 +125,7 @@
 #endif /* MBS_SUPPORT */
   buflim = buf + size;
-@@ -373,18 +337,56 @@
+@@ -373,21 +337,60 @@
 	  if (kwset)
 	    {
 	      /* Find a possible match using the KWset matcher. */
@ -134,8 +134,8 @@
 +	      size_t bytes_left = 0;
 +#endif /* MBS_SUPPORT */
 +	      size_t offset;
 +	      /* kwsexec doesn't work with match_icase and multibyte input. */
 +#ifdef MBS_SUPPORT
 +	      /* kwsexec doesn't work with match_icase and multibyte input. */
 +	      if (match_icase && MB_CUR_MAX > 1)
 +		/* Avoid kwset */
 +		offset = 0;
@ -184,8 +184,13 @@
 +#endif /* MBS_SUPPORT */
 	      while (beg > buf && beg[-1] != eol)
 		--beg;
- 	      if (kwsm.index < kwset_exact_matches)
+-	      if (kwsm.index < kwset_exact_matches)
-@@ -395,13 +397,47 @@
+	      if (!(match_icase && MB_CUR_MAX > 1) &&
 +		  (kwsm.index < kwset_exact_matches))
 		goto success_in_beg_and_end;
 	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
 		continue;
@@ -395,13 +398,47 @@
 	  else
 	    {
 	      /* No good fixed strings; start with DFA. */
@ -233,7 +238,7 @@
 	      while (beg > buf && beg[-1] != eol)
 		--beg;
 	    }
-@@ -469,15 +505,6 @@
+@@ -469,15 +506,6 @@
     } /* for (beg = end ..) */
  failure:
@ -249,7 +254,7 @@
   return (size_t) -1;
  success_in_beg_and_end:
-@@ -486,15 +513,6 @@
+@@ -486,15 +514,6 @@
   /* FALLTHROUGH */
  success_in_start_and_len:
@ -265,7 +270,7 @@
   *match_size = len;
   return start;
 }
-@@ -504,6 +522,7 @@
+@@ -504,6 +523,7 @@
 {
   char const *beg, *lim, *err;
@ -273,7 +278,7 @@
   kwsinit ();
   beg = pattern;
   do
-@@ -531,17 +550,8 @@
+@@ -531,17 +551,8 @@
   struct kwsmatch kwsmatch;
   size_t ret_val;
 #ifdef MBS_SUPPORT
@ -293,7 +298,7 @@
 #endif /* MBS_SUPPORT */
   for (beg = buf; beg <= buf + size; ++beg)
-@@ -550,8 +560,33 @@
+@@ -550,8 +561,33 @@
       if (offset == (size_t) -1)
 	goto failure;
 #ifdef MBS_SUPPORT
@ -329,7 +334,7 @@
 #endif /* MBS_SUPPORT */
       beg += offset;
       len = kwsmatch.size[0];
-@@ -587,6 +622,36 @@
+@@ -587,6 +623,36 @@
 	          if (offset == -1) {
 	            break; /* Try a different anchor. */
 	          }
@ -366,7 +371,7 @@
 	          beg += offset;
 	          len = kwsmatch.size[0];
 	        }
-@@ -597,19 +662,31 @@
+@@ -597,19 +663,31 @@
     }
  failure:
@ -406,7 +411,7 @@
   end++;
   while (buf < beg && beg[-1] != eol)
     --beg;
-@@ -618,15 +695,6 @@
+@@ -618,15 +696,6 @@
  success_in_beg_and_len:
   *match_size = len;
--- a/grep.spec
+++ b/grep.spec
@ -85,6 +85,10 @@ fi
 %{_mandir}/*/*
 %changelog
 * Thu Dec 16 2004 Tim Waugh <twaugh@redhat.com>
 - Bypass kwset matching when ignoring case and processing multibyte input
  (bug #143079).
 * Tue Dec 14 2004 Tim Waugh <twaugh@redhat.com> 2.5.1-42
 - Further UTF-8 processing avoided since a '\n' byte is always an
  end-of-line character in that encoding.