- New version: grep-2.6
- Added sigsegv patch (after release patch from upstream)
- Dropped obsoleted patches: fedora-tests, pcrewrap, case, egf-speedup,
    bz460641, utf8, dfa-optional, w
			
			
This commit is contained in:
		
							parent
							
								
									573565948a
								
							
						
					
					
						commit
						d30554e149
					
				| @ -1 +1 @@ | ||||
| grep-2.5.4.tar.bz2 | ||||
| grep-2.6.tar.xz | ||||
|  | ||||
| @ -1,48 +0,0 @@ | ||||
| From 68f09677b7aaedafa8c29280ccd76a034fe269f1 Mon Sep 17 00:00:00 2001 | ||||
| From: Debian <debian> | ||||
| Date: Sun, 23 Nov 2008 17:28:46 +0100 | ||||
| Subject: [PATCH] Case-insensitive list matching fix | ||||
| 
 | ||||
| This fixes case-insensitive matching of lists in multi-byte character sets. | ||||
| Original comment: | ||||
| 
 | ||||
| fix the following problem in multibyte locales. | ||||
|  % echo Y | egrep -i '[y]' | ||||
|  % | ||||
| 
 | ||||
| derived from gawk's dfa.c. | ||||
| 
 | ||||
| Original ticket: https://bugzilla.redhat.com/show_bug.cgi?id=123363 | ||||
| Debian: 61-dfa.c-case_fold-charclass.patch | ||||
| ---
 | ||||
|  src/dfa.c |   14 ++++++++++++++ | ||||
|  1 files changed, 14 insertions(+), 0 deletions(-) | ||||
| 
 | ||||
| diff --git a/src/dfa.c b/src/dfa.c
 | ||||
| index 934be97..088c379 100644
 | ||||
| --- a/src/dfa.c
 | ||||
| +++ b/src/dfa.c
 | ||||
| @@ -689,6 +689,20 @@ parse_bracket_exp_mb ()
 | ||||
|  	  REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, | ||||
|  			       work_mbc->nchars + 1); | ||||
|  	  work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; | ||||
| +	  if (case_fold && (iswlower((wint_t) wc) || iswupper((wint_t) wc)))
 | ||||
| +	    {
 | ||||
| +		wint_t altcase;
 | ||||
| +
 | ||||
| +		altcase = wc;		/* keeps compiler happy */
 | ||||
| +		if (iswlower((wint_t) wc))
 | ||||
| +		  altcase = towupper((wint_t) wc);
 | ||||
| +		else if (iswupper((wint_t) wc))
 | ||||
| +		  altcase = towlower((wint_t) wc);
 | ||||
| +
 | ||||
| +		REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
 | ||||
| +			       work_mbc->nchars + 1);
 | ||||
| +		work_mbc->chars[work_mbc->nchars++] = (wchar_t) altcase;
 | ||||
| +	    }
 | ||||
|  	} | ||||
|      } | ||||
|    while ((wc = wc1) != L']'); | ||||
| -- 
 | ||||
| 1.5.5.1 | ||||
| 
 | ||||
| @ -1,840 +0,0 @@ | ||||
| From aac37e1939632dbc7d2ade6f991af7ce103b0cba Mon Sep 17 00:00:00 2001 | ||||
| From: Tim Waugh <twaugh@redhat.com> | ||||
| Date: Sun, 23 Nov 2008 17:30:59 +0100 | ||||
| Subject: [PATCH] EGF Speedup | ||||
| 
 | ||||
| The full story behind this patch is that grep-2.5.1a does not handle UTF-8 gracefully at all. The basic plan with handling UTF-8 in 2.5.1a is: | ||||
| 
 | ||||
|     * whenever a buffer is parsed, go through the entire buffer deciding how many bytes make up each character | ||||
|     * use this information when necessary | ||||
| 
 | ||||
| This patch changes that to: | ||||
| 
 | ||||
|     * when information about how many bytes make up a character is needed, work it out on demand | ||||
| 
 | ||||
| On the face of it, this is a small obvious improvement. In fact it is much better than that, because the original scheme would calculate character lengths several times for each buffer: in fact, one full pass for every single potential match! | ||||
| 
 | ||||
| For a full discussion of this patch, as well as dfa-optional, including benchmarking results, see the mailing list. | ||||
| 
 | ||||
| Upstream ticket: https://savannah.gnu.org/patch/?3803 | ||||
| Debian: 64-egf-speedup.patch | ||||
| Debian: 66-match_icase.patch | ||||
| ---
 | ||||
|  lib/posix/regex.h |    4 + | ||||
|  src/search.c      |  652 +++++++++++++++++++++++++++++++++++++++++++---------- | ||||
|  2 files changed, 535 insertions(+), 121 deletions(-) | ||||
| 
 | ||||
| diff --git a/lib/posix/regex.h b/lib/posix/regex.h
 | ||||
| index f4c4150..98df2cb 100644
 | ||||
| --- a/lib/posix/regex.h
 | ||||
| +++ b/lib/posix/regex.h
 | ||||
| @@ -165,6 +165,10 @@ typedef unsigned long int reg_syntax_t;
 | ||||
|     treated as 'a\{1'.  */ | ||||
|  #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) | ||||
|   | ||||
| +/* If this bit is set, then ignore case when matching.
 | ||||
| +   If not set, then case is significant.  */
 | ||||
| +#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
 | ||||
| +
 | ||||
|  /* This global variable defines the particular regexp syntax to use (for | ||||
|     some interfaces).  When a regexp is compiled, the syntax used is | ||||
|     stored in the pattern buffer, so changing this does not affect | ||||
| diff --git a/src/search.c b/src/search.c
 | ||||
| index 7f5f187..9691fb8 100644
 | ||||
| --- a/src/search.c
 | ||||
| +++ b/src/search.c
 | ||||
| @@ -18,10 +18,15 @@
 | ||||
|   | ||||
|  /* Written August 1992 by Mike Haertel. */ | ||||
|   | ||||
| +#ifndef _GNU_SOURCE
 | ||||
| +# define _GNU_SOURCE 1
 | ||||
| +#endif
 | ||||
|  #ifdef HAVE_CONFIG_H | ||||
|  # include <config.h> | ||||
|  #endif | ||||
|   | ||||
| +#include <assert.h>
 | ||||
| +
 | ||||
|  #include <sys/types.h> | ||||
|   | ||||
|  #include "mbsupport.h" | ||||
| @@ -43,6 +48,9 @@
 | ||||
|  #ifdef HAVE_LIBPCRE | ||||
|  # include <pcre.h> | ||||
|  #endif | ||||
| +#ifdef HAVE_LANGINFO_CODESET
 | ||||
| +# include <langinfo.h>
 | ||||
| +#endif
 | ||||
|   | ||||
|  #define NCHAR (UCHAR_MAX + 1) | ||||
|   | ||||
| @@ -68,6 +76,19 @@ kwsinit (void)
 | ||||
|      error (2, 0, _("memory exhausted")); | ||||
|  } | ||||
|   | ||||
| +/* UTF-8 encoding allows some optimizations that we can't otherwise
 | ||||
| +   assume in a multibyte encoding. */
 | ||||
| +static int using_utf8;
 | ||||
| +
 | ||||
| +void
 | ||||
| +check_utf8 (void)
 | ||||
| +{
 | ||||
| +#ifdef HAVE_LANGINFO_CODESET
 | ||||
| +  if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
 | ||||
| +    using_utf8 = 1;
 | ||||
| +#endif
 | ||||
| +}
 | ||||
| +
 | ||||
|  #ifndef FGREP_PROGRAM | ||||
|  /* DFA compiled regexp. */ | ||||
|  static struct dfa dfa; | ||||
| @@ -134,49 +155,6 @@ kwsmusts (void)
 | ||||
|  } | ||||
|  #endif /* !FGREP_PROGRAM */ | ||||
|   | ||||
| -#ifdef MBS_SUPPORT
 | ||||
| -/* This function allocate the array which correspond to "buf".
 | ||||
| -   Then this check multibyte string and mark on the positions which
 | ||||
| -   are not single byte character nor the first byte of a multibyte
 | ||||
| -   character.  Caller must free the array.  */
 | ||||
| -static char*
 | ||||
| -check_multibyte_string(char const *buf, size_t size)
 | ||||
| -{
 | ||||
| -  char *mb_properties = xmalloc(size);
 | ||||
| -  mbstate_t cur_state;
 | ||||
| -  wchar_t wc;
 | ||||
| -  int i;
 | ||||
| -
 | ||||
| -  memset(&cur_state, 0, sizeof(mbstate_t));
 | ||||
| -  memset(mb_properties, 0, sizeof(char)*size);
 | ||||
| -
 | ||||
| -  for (i = 0; i < size ;)
 | ||||
| -    {
 | ||||
| -      size_t mbclen;
 | ||||
| -      mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
 | ||||
| -
 | ||||
| -      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
 | ||||
| -	{
 | ||||
| -	  /* An invalid sequence, or a truncated multibyte character.
 | ||||
| -	     We treat it as a single byte character.  */
 | ||||
| -	  mbclen = 1;
 | ||||
| -	}
 | ||||
| -      else if (match_icase)
 | ||||
| -	{
 | ||||
| -	  if (iswupper((wint_t)wc))
 | ||||
| -	    {
 | ||||
| -	      wc = towlower((wint_t)wc);
 | ||||
| -	      wcrtomb(buf + i, wc, &cur_state);
 | ||||
| -	    }
 | ||||
| -	}
 | ||||
| -      mb_properties[i] = mbclen;
 | ||||
| -      i += mbclen;
 | ||||
| -    }
 | ||||
| -
 | ||||
| -  return mb_properties;
 | ||||
| -}
 | ||||
| -#endif /* MBS_SUPPORT */
 | ||||
| -
 | ||||
|  #if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) | ||||
|  #ifdef EGREP_PROGRAM | ||||
|  COMPILE_FCT(Ecompile) | ||||
| @@ -193,10 +171,9 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
 | ||||
|    size_t total = size; | ||||
|    char const *motif = pattern; | ||||
|   | ||||
| -#if 0
 | ||||
| +  check_utf8 ();
 | ||||
|    if (match_icase) | ||||
|      syntax_bits |= RE_ICASE; | ||||
| -#endif
 | ||||
|    re_set_syntax (syntax_bits); | ||||
|    dfasyntax (syntax_bits, match_icase, eolbyte); | ||||
|   | ||||
| @@ -303,20 +280,9 @@ EXECUTE_FCT(EGexecute)
 | ||||
|    struct kwsmatch kwsm; | ||||
|    size_t i, ret_val; | ||||
|  #ifdef MBS_SUPPORT | ||||
| -  char *mb_properties = NULL;
 | ||||
| -  if (MB_CUR_MAX > 1)
 | ||||
| -    {
 | ||||
| -      if (match_icase)
 | ||||
| -        {
 | ||||
| -          char *case_buf = xmalloc(size);
 | ||||
| -          memcpy(case_buf, buf, size);
 | ||||
| -	  if (start_ptr)
 | ||||
| -	    start_ptr = case_buf + (start_ptr - buf);
 | ||||
| -          buf = case_buf;
 | ||||
| -        }
 | ||||
| -      if (kwset)
 | ||||
| -        mb_properties = check_multibyte_string(buf, size);
 | ||||
| -    }
 | ||||
| +  int mb_cur_max = MB_CUR_MAX;
 | ||||
| +  mbstate_t mbs;
 | ||||
| +  memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
|  #endif /* MBS_SUPPORT */ | ||||
|   | ||||
|    buflim = buf + size; | ||||
| @@ -329,21 +295,63 @@ EXECUTE_FCT(EGexecute)
 | ||||
|  	  if (kwset) | ||||
|  	    { | ||||
|  	      /* Find a possible match using the KWset matcher. */ | ||||
| -	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	      size_t bytes_left = 0;
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +	      size_t offset;
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	      /* kwsexec doesn't work with match_icase and multibyte input. */
 | ||||
| +	      if (match_icase && mb_cur_max > 1)
 | ||||
| +		/* Avoid kwset */
 | ||||
| +		offset = 0;
 | ||||
| +	      else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +	      offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
 | ||||
|  	      if (offset == (size_t) -1) | ||||
| -		goto failure;
 | ||||
| +		return (size_t)-1;
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	      if (mb_cur_max > 1 && !using_utf8)
 | ||||
| +		{
 | ||||
| +		  bytes_left = offset;
 | ||||
| +		  while (bytes_left)
 | ||||
| +		    {
 | ||||
| +		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
 | ||||
| +		      if (mlen == (size_t) -1 || mlen == 0)
 | ||||
| +			{
 | ||||
| +			  /* Incomplete character: treat as single-byte. */
 | ||||
| +			  memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +			  beg++;
 | ||||
| +			  bytes_left--;
 | ||||
| +			  continue;
 | ||||
| +			}
 | ||||
| +
 | ||||
| +		      if (mlen == (size_t) -2)
 | ||||
| +			/* Offset points inside multibyte character:
 | ||||
| +			 * no good. */
 | ||||
| +			break;
 | ||||
| +
 | ||||
| +		      beg += mlen;
 | ||||
| +		      bytes_left -= mlen;
 | ||||
| +		    }
 | ||||
| +		}
 | ||||
| +	      else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
|  	      beg += offset; | ||||
|  	      /* Narrow down to the line containing the candidate, and | ||||
|  		 run it through DFA. */ | ||||
|  	      end = memchr(beg, eol, buflim - beg); | ||||
|  	      end++; | ||||
|  #ifdef MBS_SUPPORT | ||||
| -	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
 | ||||
| +	      if (mb_cur_max > 1 && bytes_left)
 | ||||
|  		continue; | ||||
|  #endif | ||||
|  	      while (beg > buf && beg[-1] != eol) | ||||
|  		--beg; | ||||
| -	      if (kwsm.index < kwset_exact_matches)
 | ||||
| +	      if (
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +		  !(match_icase && mb_cur_max > 1) &&
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +		  (kwsm.index < kwset_exact_matches))
 | ||||
|  		goto success; | ||||
|  	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) | ||||
|  		continue; | ||||
| @@ -351,13 +359,47 @@ EXECUTE_FCT(EGexecute)
 | ||||
|  	  else | ||||
|  	    { | ||||
|  	      /* No good fixed strings; start with DFA. */ | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	      size_t bytes_left = 0;
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
|  	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); | ||||
|  	      if (offset == (size_t) -1) | ||||
|  		break; | ||||
|  	      /* Narrow down to the line we've found. */ | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	      if (mb_cur_max > 1 && !using_utf8)
 | ||||
| +		{
 | ||||
| +		  bytes_left = offset;
 | ||||
| +		  while (bytes_left)
 | ||||
| +		    {
 | ||||
| +		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
 | ||||
| +		      if (mlen == (size_t) -1 || mlen == 0)
 | ||||
| +			{
 | ||||
| +			  /* Incomplete character: treat as single-byte. */
 | ||||
| +			  memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +			  beg++;
 | ||||
| +			  bytes_left--;
 | ||||
| +			  continue;
 | ||||
| +			}
 | ||||
| +
 | ||||
| +		      if (mlen == (size_t) -2)
 | ||||
| +			/* Offset points inside multibyte character:
 | ||||
| +			 * no good. */
 | ||||
| +			break;
 | ||||
| +
 | ||||
| +		      beg += mlen;
 | ||||
| +		      bytes_left -= mlen;
 | ||||
| +		    }
 | ||||
| +		}
 | ||||
| +	      else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
|  	      beg += offset; | ||||
|  	      end = memchr (beg, eol, buflim - beg); | ||||
|  	      end++; | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	      if (mb_cur_max > 1 && bytes_left)
 | ||||
| +		continue;
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
|  	      while (beg > buf && beg[-1] != eol) | ||||
|  		--beg; | ||||
|  	    } | ||||
| @@ -475,24 +517,144 @@ EXECUTE_FCT(EGexecute)
 | ||||
|    *match_size = len; | ||||
|    ret_val = beg - buf; | ||||
|   out: | ||||
| -#ifdef MBS_SUPPORT
 | ||||
| -  if (MB_CUR_MAX > 1)
 | ||||
| -    {
 | ||||
| -      if (match_icase)
 | ||||
| -        free((char*)buf);
 | ||||
| -      if (mb_properties)
 | ||||
| -        free(mb_properties);
 | ||||
| -    }
 | ||||
| -#endif /* MBS_SUPPORT */
 | ||||
|    return ret_val; | ||||
|  } | ||||
|  #endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */ | ||||
|   | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +static int f_i_multibyte; /* whether we're using the new -Fi MB method */
 | ||||
| +static struct
 | ||||
| +{
 | ||||
| +  wchar_t **patterns;
 | ||||
| +  size_t count, maxlen;
 | ||||
| +  unsigned char *match;
 | ||||
| +} Fimb;
 | ||||
| +#endif
 | ||||
| +
 | ||||
|  #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) | ||||
|  COMPILE_FCT(Fcompile) | ||||
|  { | ||||
| +  int mb_cur_max = MB_CUR_MAX;
 | ||||
|    char const *beg, *lim, *err; | ||||
|   | ||||
| +  check_utf8 ();
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +  /* Support -F -i for UTF-8 input. */
 | ||||
| +  if (match_icase && mb_cur_max > 1)
 | ||||
| +    {
 | ||||
| +      mbstate_t mbs;
 | ||||
| +      wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
 | ||||
| +      const char *patternend = pattern;
 | ||||
| +      size_t wcsize;
 | ||||
| +      kwset_t fimb_kwset = NULL;
 | ||||
| +      char *starts = NULL;
 | ||||
| +      wchar_t *wcbeg, *wclim;
 | ||||
| +      size_t allocated = 0;
 | ||||
| +
 | ||||
| +      memset (&mbs, '\0', sizeof (mbs));
 | ||||
| +# ifdef __GNU_LIBRARY__
 | ||||
| +      wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
 | ||||
| +      if (patternend != pattern + size)
 | ||||
| +	wcsize = (size_t) -1;
 | ||||
| +# else
 | ||||
| +      {
 | ||||
| +	char *patterncopy = xmalloc (size + 1);
 | ||||
| +
 | ||||
| +	memcpy (patterncopy, pattern, size);
 | ||||
| +	patterncopy[size] = '\0';
 | ||||
| +	patternend = patterncopy;
 | ||||
| +	wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
 | ||||
| +	if (patternend != patterncopy + size)
 | ||||
| +	  wcsize = (size_t) -1;
 | ||||
| +	free (patterncopy);
 | ||||
| +      }
 | ||||
| +# endif
 | ||||
| +      if (wcsize + 2 <= 2)
 | ||||
| +	{
 | ||||
| +fimb_fail:
 | ||||
| +	  free (wcpattern);
 | ||||
| +	  free (starts);
 | ||||
| +	  if (fimb_kwset)
 | ||||
| +	    kwsfree (fimb_kwset);
 | ||||
| +	  free (Fimb.patterns);
 | ||||
| +	  Fimb.patterns = NULL;
 | ||||
| +	}
 | ||||
| +      else
 | ||||
| +	{
 | ||||
| +	  if (!(fimb_kwset = kwsalloc (NULL)))
 | ||||
| +	    error (2, 0, _("memory exhausted"));
 | ||||
| +
 | ||||
| +	  starts = xmalloc (mb_cur_max * 3);
 | ||||
| +	  wcbeg = wcpattern;
 | ||||
| +	  do
 | ||||
| +	    {
 | ||||
| +	      int i;
 | ||||
| +	      size_t wclen;
 | ||||
| +
 | ||||
| +	      if (Fimb.count >= allocated)
 | ||||
| +		{
 | ||||
| +		  if (allocated == 0)
 | ||||
| +		    allocated = 128;
 | ||||
| +		  else
 | ||||
| +		    allocated *= 2;
 | ||||
| +		  Fimb.patterns = xrealloc (Fimb.patterns,
 | ||||
| +					    sizeof (wchar_t *) * allocated);
 | ||||
| +		}
 | ||||
| +	      Fimb.patterns[Fimb.count++] = wcbeg;
 | ||||
| +	      for (wclim = wcbeg;
 | ||||
| +		   wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
 | ||||
| +		*wclim = towlower (*wclim);
 | ||||
| +	      *wclim = L'\0';
 | ||||
| +	      wclen = wclim - wcbeg;
 | ||||
| +	      if (wclen > Fimb.maxlen)
 | ||||
| +		Fimb.maxlen = wclen;
 | ||||
| +	      if (wclen > 3)
 | ||||
| +		wclen = 3;
 | ||||
| +	      if (wclen == 0)
 | ||||
| +		{
 | ||||
| +		  if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
 | ||||
| +		    error (2, 0, err);
 | ||||
| +		}
 | ||||
| +	      else
 | ||||
| +		for (i = 0; i < (1 << wclen); i++)
 | ||||
| +		  {
 | ||||
| +		    char *p = starts;
 | ||||
| +		    int j, k;
 | ||||
| +
 | ||||
| +		    for (j = 0; j < wclen; ++j)
 | ||||
| +		      {
 | ||||
| +			wchar_t wc = wcbeg[j];
 | ||||
| +			if (i & (1 << j))
 | ||||
| +			  {
 | ||||
| +			    wc = towupper (wc);
 | ||||
| +			    if (wc == wcbeg[j])
 | ||||
| +			      continue;
 | ||||
| +			  }
 | ||||
| +			k = wctomb (p, wc);
 | ||||
| +			if (k <= 0)
 | ||||
| +			  goto fimb_fail;
 | ||||
| +			p += k;
 | ||||
| +		      }
 | ||||
| +		    if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
 | ||||
| +		      error (2, 0, err);
 | ||||
| +		  }
 | ||||
| +	      if (wclim < wcpattern + wcsize)
 | ||||
| +		++wclim;
 | ||||
| +	      wcbeg = wclim;
 | ||||
| +	    }
 | ||||
| +	  while (wcbeg < wcpattern + wcsize);
 | ||||
| +	  f_i_multibyte = 1;
 | ||||
| +	  kwset = fimb_kwset;
 | ||||
| +	  free (starts);
 | ||||
| +	  Fimb.match = xmalloc (Fimb.count);
 | ||||
| +	  if ((err = kwsprep (kwset)) != 0)
 | ||||
| +	    error (2, 0, err);
 | ||||
| +	  return;
 | ||||
| +	}
 | ||||
| +    }
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +
 | ||||
| +
 | ||||
|    kwsinit (); | ||||
|    beg = pattern; | ||||
|    do | ||||
| @@ -511,6 +673,76 @@ COMPILE_FCT(Fcompile)
 | ||||
|      error (2, 0, err); | ||||
|  } | ||||
|   | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +static int
 | ||||
| +Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
 | ||||
| +{
 | ||||
| +  size_t len, letter, i;
 | ||||
| +  int ret = -1;
 | ||||
| +  mbstate_t mbs;
 | ||||
| +  wchar_t wc;
 | ||||
| +  int patterns_left;
 | ||||
| +
 | ||||
| +  assert (match_icase && f_i_multibyte == 1);
 | ||||
| +  assert (MB_CUR_MAX > 1);
 | ||||
| +
 | ||||
| +  memset (&mbs, '\0', sizeof (mbs));
 | ||||
| +  memset (Fimb.match, '\1', Fimb.count);
 | ||||
| +  letter = len = 0;
 | ||||
| +  patterns_left = 1;
 | ||||
| +  while (patterns_left && len <= size)
 | ||||
| +    {
 | ||||
| +      size_t c;
 | ||||
| +
 | ||||
| +      patterns_left = 0;
 | ||||
| +      if (len < size)
 | ||||
| +	{
 | ||||
| +	  c = mbrtowc (&wc, buf + len, size - len, &mbs);
 | ||||
| +	  if (c + 2 <= 2)
 | ||||
| +	    return ret;
 | ||||
| +
 | ||||
| +	  wc = towlower (wc);
 | ||||
| +	}
 | ||||
| +      else
 | ||||
| +	{
 | ||||
| +	  c = 1;
 | ||||
| +	  wc = L'\0';
 | ||||
| +	}
 | ||||
| +
 | ||||
| +      for (i = 0; i < Fimb.count; i++)
 | ||||
| +	{
 | ||||
| +	  if (Fimb.match[i])
 | ||||
| +	    {
 | ||||
| +	      if (Fimb.patterns[i][letter] == L'\0')
 | ||||
| +		{
 | ||||
| +		  /* Found a match. */
 | ||||
| +		  *plen = len;
 | ||||
| +		  if (!exact && !match_words)
 | ||||
| +		    return 0;
 | ||||
| +		  else
 | ||||
| +		    {
 | ||||
| +		      /* For -w or exact look for longest match.  */
 | ||||
| +		      ret = 0;
 | ||||
| +		      Fimb.match[i] = '\0';
 | ||||
| +		      continue;
 | ||||
| +		    }
 | ||||
| +		}
 | ||||
| +
 | ||||
| +	      if (Fimb.patterns[i][letter] == wc)
 | ||||
| +		patterns_left = 1;
 | ||||
| +	      else
 | ||||
| +		Fimb.match[i] = '\0';
 | ||||
| +	    }
 | ||||
| +	}
 | ||||
| +
 | ||||
| +      len += c;
 | ||||
| +      letter++;
 | ||||
| +    }
 | ||||
| +
 | ||||
| +  return ret;
 | ||||
| +}
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +
 | ||||
|  EXECUTE_FCT(Fexecute) | ||||
|  { | ||||
|    register char const *beg, *try, *end; | ||||
| @@ -519,69 +751,256 @@ EXECUTE_FCT(Fexecute)
 | ||||
|    struct kwsmatch kwsmatch; | ||||
|    size_t ret_val; | ||||
|  #ifdef MBS_SUPPORT | ||||
| -  char *mb_properties = NULL;
 | ||||
| -  if (MB_CUR_MAX > 1)
 | ||||
| -    {
 | ||||
| -      if (match_icase)
 | ||||
| -        {
 | ||||
| -          char *case_buf = xmalloc(size);
 | ||||
| -          memcpy(case_buf, buf, size);
 | ||||
| -	  if (start_ptr)
 | ||||
| -	    start_ptr = case_buf + (start_ptr - buf);
 | ||||
| -          buf = case_buf;
 | ||||
| -        }
 | ||||
| -      mb_properties = check_multibyte_string(buf, size);
 | ||||
| -    }
 | ||||
| +  int mb_cur_max = MB_CUR_MAX;
 | ||||
| +  mbstate_t mbs;
 | ||||
| +  memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +  const char *last_char = NULL;
 | ||||
|  #endif /* MBS_SUPPORT */ | ||||
|   | ||||
|    for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) | ||||
|      { | ||||
|        size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); | ||||
|        if (offset == (size_t) -1) | ||||
| -	goto failure;
 | ||||
| +	return offset;
 | ||||
|  #ifdef MBS_SUPPORT | ||||
| -      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
 | ||||
| -	continue; /* It is a part of multibyte character.  */
 | ||||
| +      if (mb_cur_max > 1 && !using_utf8)
 | ||||
| +	{
 | ||||
| +	  size_t bytes_left = offset;
 | ||||
| +	  while (bytes_left)
 | ||||
| +	    {
 | ||||
| +	      size_t mlen = mbrlen (beg, bytes_left, &mbs);
 | ||||
| +
 | ||||
| +	      last_char = beg;
 | ||||
| +	      if (mlen == (size_t) -1 || mlen == 0)
 | ||||
| +		{
 | ||||
| +		  /* Incomplete character: treat as single-byte. */
 | ||||
| +		  memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +		  beg++;
 | ||||
| +		  bytes_left--;
 | ||||
| +		  continue;
 | ||||
| +		}
 | ||||
| +
 | ||||
| +	      if (mlen == (size_t) -2)
 | ||||
| +		/* Offset points inside multibyte character: no good. */
 | ||||
| +		break;
 | ||||
| +
 | ||||
| +	      beg += mlen;
 | ||||
| +	      bytes_left -= mlen;
 | ||||
| +	    }
 | ||||
| +
 | ||||
| +	  if (bytes_left)
 | ||||
| +	    continue;
 | ||||
| +	}
 | ||||
| +      else
 | ||||
|  #endif /* MBS_SUPPORT */ | ||||
|        beg += offset; | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +      /* For f_i_multibyte, the string at beg now matches first 3 chars of
 | ||||
| +	 one of the search strings (less if there are shorter search strings).
 | ||||
| +	 See if this is a real match.  */
 | ||||
| +      if (f_i_multibyte
 | ||||
| +	  && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], start_ptr == NULL))
 | ||||
| +	goto next_char;
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
|        len = kwsmatch.size[0]; | ||||
|        if (start_ptr && !match_words) | ||||
|  	goto success_in_beg_and_len; | ||||
|        if (match_lines) | ||||
|  	{ | ||||
|  	  if (beg > buf && beg[-1] != eol) | ||||
| -	    continue;
 | ||||
| +	    goto next_char;
 | ||||
|  	  if (beg + len < buf + size && beg[len] != eol) | ||||
| -	    continue;
 | ||||
| +	    goto next_char;
 | ||||
|  	  goto success; | ||||
|  	} | ||||
|        else if (match_words) | ||||
| -	for (try = beg; len; )
 | ||||
| -	  {
 | ||||
| -	    if (try > buf && WCHAR((unsigned char) try[-1]))
 | ||||
| -	      break;
 | ||||
| -	    if (try + len < buf + size && WCHAR((unsigned char) try[len]))
 | ||||
| -	      {
 | ||||
| -		offset = kwsexec (kwset, beg, --len, &kwsmatch);
 | ||||
| -		if (offset == (size_t) -1)
 | ||||
| -		  break;
 | ||||
| -		try = beg + offset;
 | ||||
| -		len = kwsmatch.size[0];
 | ||||
| -	      }
 | ||||
| -	    else if (!start_ptr)
 | ||||
| -	      goto success;
 | ||||
| -	    else
 | ||||
| -	      goto success_in_beg_and_len;
 | ||||
| -	  } /* for (try) */
 | ||||
| -      else
 | ||||
| +	{
 | ||||
| +	  while (len)
 | ||||
| +	    {
 | ||||
| +	      int word_match = 0;
 | ||||
| +	      if (beg > buf)
 | ||||
| +		{
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +		  if (mb_cur_max > 1)
 | ||||
| +		    {
 | ||||
| +		      const char *s;
 | ||||
| +		      int mr;
 | ||||
| +		      wchar_t pwc;
 | ||||
| +
 | ||||
| +		      if (using_utf8)
 | ||||
| +			{
 | ||||
| +			  s = beg - 1;
 | ||||
| +			  while (s > buf
 | ||||
| +				 && (unsigned char) *s >= 0x80
 | ||||
| +				 && (unsigned char) *s <= 0xbf)
 | ||||
| +			    --s;
 | ||||
| +			}
 | ||||
| +		      else
 | ||||
| +			s = last_char;
 | ||||
| +		      mr = mbtowc (&pwc, s, beg - s);
 | ||||
| +		      if (mr <= 0)
 | ||||
| +			memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +		      else if ((iswalnum (pwc) || pwc == L'_')
 | ||||
| +			       && mr == (int) (beg - s))
 | ||||
| +			goto next_char;
 | ||||
| +		    }
 | ||||
| +		  else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +		  if (WCHAR ((unsigned char) beg[-1]))
 | ||||
| +		    goto next_char;
 | ||||
| +		}
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	      if (mb_cur_max > 1)
 | ||||
| +		{
 | ||||
| +		  wchar_t nwc;
 | ||||
| +		  int mr;
 | ||||
| +
 | ||||
| +		  mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
 | ||||
| +		  if (mr <= 0)
 | ||||
| +		    {
 | ||||
| +		      memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +		      word_match = 1;
 | ||||
| +		    }
 | ||||
| +		  else if (!iswalnum (nwc) && nwc != L'_')
 | ||||
| +		    word_match = 1;
 | ||||
| +		}
 | ||||
| +	      else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +		if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
 | ||||
| +		  word_match = 1;
 | ||||
| +	      if (word_match)
 | ||||
| +		{
 | ||||
| +		  if (start_ptr == NULL)
 | ||||
| +		    /* Returns the whole line now we know there's a word match. */
 | ||||
| +		    goto success;
 | ||||
| +		  else {
 | ||||
| +		    /* Returns just this word match. */
 | ||||
| +		    *match_size = len;
 | ||||
| +		    return beg - buf;
 | ||||
| +		  }
 | ||||
| +		}
 | ||||
| +	      if (len > 0)
 | ||||
| +		{
 | ||||
| +		  /* Try a shorter length anchored at the same place. */
 | ||||
| +		  --len;
 | ||||
| +		  offset = kwsexec (kwset, beg, len, &kwsmatch);
 | ||||
| +
 | ||||
| +		  if (offset == -1)
 | ||||
| +		    goto next_char; /* Try a different anchor. */
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +
 | ||||
| +		  if (mb_cur_max > 1 && !using_utf8)
 | ||||
| +		    {
 | ||||
| +		      size_t bytes_left = offset;
 | ||||
| +		      while (bytes_left)
 | ||||
| +			{
 | ||||
| +			  size_t mlen = mbrlen (beg, bytes_left, &mbs);
 | ||||
| +
 | ||||
| +			  last_char = beg;
 | ||||
| +			  if (mlen == (size_t) -1 || mlen == 0)
 | ||||
| +			    {
 | ||||
| +			      /* Incomplete character: treat as single-byte. */
 | ||||
| +			      memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +			      beg++;
 | ||||
| +			      bytes_left--;
 | ||||
| +			      continue;
 | ||||
| +			    }
 | ||||
| +
 | ||||
| +			  if (mlen == (size_t) -2)
 | ||||
| +			    {
 | ||||
| +			      /* Offset points inside multibyte character:
 | ||||
| +			       * no good. */
 | ||||
| +			      break;
 | ||||
| +			    }
 | ||||
| +
 | ||||
| +			  beg += mlen;
 | ||||
| +			  bytes_left -= mlen;
 | ||||
| +			}
 | ||||
| +
 | ||||
| +		      if (bytes_left)
 | ||||
| +			{
 | ||||
| +			  memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +			  goto next_char; /* Try a different anchor. */
 | ||||
| +			}
 | ||||
| +		    }
 | ||||
| +		  else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +		  beg += offset;
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +		  /* The string at beg now matches first 3 chars of one of
 | ||||
| +		     the search strings (less if there are shorter search
 | ||||
| +		     strings).  See if this is a real match.  */
 | ||||
| +		  if (f_i_multibyte
 | ||||
| +		      && Fimbexec (beg, len - offset, &kwsmatch.size[0],
 | ||||
| +				   start_ptr == NULL))
 | ||||
| +		    goto next_char;
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +		  len = kwsmatch.size[0];
 | ||||
| +		}
 | ||||
| +	    }
 | ||||
| +	}
 | ||||
| +       else
 | ||||
|  	goto success; | ||||
| -    } /* for (beg in buf) */
 | ||||
| +next_char:;
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +      /* Advance to next character.  For MB_CUR_MAX == 1 case this is handled
 | ||||
| +	 by ++beg above.  */
 | ||||
| +      if (mb_cur_max > 1)
 | ||||
| +	{
 | ||||
| +	  if (using_utf8)
 | ||||
| +	    {
 | ||||
| +	      unsigned char c = *beg;
 | ||||
| +	      if (c >= 0xc2)
 | ||||
| +		{
 | ||||
| +		  if (c < 0xe0)
 | ||||
| +		    ++beg;
 | ||||
| +		  else if (c < 0xf0)
 | ||||
| +		    beg += 2;
 | ||||
| +		  else if (c < 0xf8)
 | ||||
| +		    beg += 3;
 | ||||
| +		  else if (c < 0xfc)
 | ||||
| +		    beg += 4;
 | ||||
| +		  else if (c < 0xfe)
 | ||||
| +		    beg += 5;
 | ||||
| +		}
 | ||||
| +	    }
 | ||||
| +	  else
 | ||||
| +	    {
 | ||||
| +	      size_t l = mbrlen (beg, buf + size - beg, &mbs);
 | ||||
|   | ||||
| - failure:
 | ||||
| -  ret_val = -1;
 | ||||
| -  goto out;
 | ||||
| +	      last_char = beg;
 | ||||
| +	      if (l + 2 >= 2)
 | ||||
| +		beg += l - 1;
 | ||||
| +	      else
 | ||||
| +		memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +	    }
 | ||||
| +	}
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +    }
 | ||||
| +
 | ||||
| +  return -1;
 | ||||
|   | ||||
|   success: | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +  if (mb_cur_max > 1 && !using_utf8)
 | ||||
| +    {
 | ||||
| +      end = beg + len;
 | ||||
| +      while (end < buf + size)
 | ||||
| +	{
 | ||||
| +	  size_t mlen = mbrlen (end, buf + size - end, &mbs);
 | ||||
| +	  if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
 | ||||
| +	    {
 | ||||
| +	      memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +	      mlen = 1;
 | ||||
| +	    }
 | ||||
| +	  if (mlen == 1 && *end == eol)
 | ||||
| +	    break;
 | ||||
| +
 | ||||
| +	  end += mlen;
 | ||||
| +	}
 | ||||
| +     }
 | ||||
| +  else
 | ||||
| + #endif /* MBS_SUPPORT */
 | ||||
|    end = memchr (beg + len, eol, (buf + size) - (beg + len)); | ||||
|    end++; | ||||
|    while (buf < beg && beg[-1] != eol) | ||||
| @@ -591,15 +1010,6 @@ EXECUTE_FCT(Fexecute)
 | ||||
|    *match_size = len; | ||||
|    ret_val = beg - buf; | ||||
|   out: | ||||
| -#ifdef MBS_SUPPORT
 | ||||
| -  if (MB_CUR_MAX > 1)
 | ||||
| -    {
 | ||||
| -      if (match_icase)
 | ||||
| -        free((char*)buf);
 | ||||
| -      if (mb_properties)
 | ||||
| -        free(mb_properties);
 | ||||
| -    }
 | ||||
| -#endif /* MBS_SUPPORT */
 | ||||
|    return ret_val; | ||||
|  } | ||||
|  #endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */ | ||||
| -- 
 | ||||
| 1.5.5.1 | ||||
| 
 | ||||
| @ -1,146 +0,0 @@ | ||||
| From aad5428ee639eb9c462097022a0954e8a2bcfb90 Mon Sep 17 00:00:00 2001 | ||||
| From: Lubomir Rintel <lkundrak@v3.sk> | ||||
| Date: Sun, 23 Nov 2008 17:24:13 +0100 | ||||
| Subject: [PATCH] Fedora Grep regression test suite | ||||
| 
 | ||||
| ---
 | ||||
|  tests/Makefile.am |    2 +- | ||||
|  tests/fedora.sh   |  113 +++++++++++++++++++++++++++++++++++++++++++++++++++++ | ||||
|  2 files changed, 114 insertions(+), 1 deletions(-) | ||||
|  create mode 100644 tests/fedora.sh | ||||
| 
 | ||||
| diff --git a/tests/Makefile.am b/tests/Makefile.am
 | ||||
| index 6ac60e6..02f30ac 100644
 | ||||
| --- a/tests/Makefile.am
 | ||||
| +++ b/tests/Makefile.am
 | ||||
| @@ -4,7 +4,7 @@ AWK=@AWK@
 | ||||
|   | ||||
|  TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh pcre.sh \ | ||||
|          status.sh empty.sh options.sh backref.sh file.sh foad1.sh \ | ||||
| -        fmbtest.sh yesno.sh
 | ||||
| +        fmbtest.sh yesno.sh fedora.sh
 | ||||
|  EXTRA_DIST = $(TESTS) \ | ||||
|               khadafy.lines khadafy.regexp \ | ||||
|               spencer1.awk spencer1.tests \ | ||||
| diff --git a/tests/fedora.sh b/tests/fedora.sh
 | ||||
| new file mode 100644 | ||||
| index 0000000..9a140b7
 | ||||
| --- /dev/null
 | ||||
| +++ b/tests/fedora.sh
 | ||||
| @@ -0,0 +1,113 @@
 | ||||
| +#!/bin/bash
 | ||||
| +
 | ||||
| +# GREP Regression test suite for Fedora bugs and fixes
 | ||||
| +# (c) 2008 Lubomir Rintel
 | ||||
| +# Licensed under the same terms as GNU Grep itself
 | ||||
| +
 | ||||
| +if [ -t 1 ]
 | ||||
| +then
 | ||||
| +	# Colored output on terimal
 | ||||
| +	G='\033[32m'
 | ||||
| +	R='\033[31m'
 | ||||
| +	D='\033[0m'
 | ||||
| +fi
 | ||||
| +
 | ||||
| +ok ()	{ echo -e "${G}OK${D}"; }
 | ||||
| +fail () { echo -e "${R}FAIL${D} (See ${U})"; failures=1; }
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=116909
 | ||||
| +echo -n "fgrep false negatives: "
 | ||||
| +diff <(${GREP} -F -w -f \
 | ||||
| +<(cat <<EOF
 | ||||
| +a
 | ||||
| +b
 | ||||
| +c
 | ||||
| +EOF
 | ||||
| +) \
 | ||||
| +<(cat <<EOF
 | ||||
| +a
 | ||||
| +barn
 | ||||
| +c
 | ||||
| +EOF
 | ||||
| +)) \
 | ||||
| +<(cat <<EOF
 | ||||
| +a
 | ||||
| +c
 | ||||
| +EOF
 | ||||
| +) && ok || fail
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=123362
 | ||||
| +echo -n "bad handling of brackets in UTF-8: "
 | ||||
| +diff <(echo Y | LC_ALL=de_DE.UTF-8 ${GREP} -i '[y,Y]') \
 | ||||
| +<(cat <<EOF
 | ||||
| +Y
 | ||||
| +EOF
 | ||||
| +) && ok || fail
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=112869
 | ||||
| +echo -n "crash with \W: "
 | ||||
| +diff <(echo '<form>' | LANG=it_IT ${GREP} -iE '\Wform\W') \
 | ||||
| +<(cat <<EOF
 | ||||
| +<form>
 | ||||
| +EOF
 | ||||
| +) && ok || fail
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=189580
 | ||||
| +echo -n "grep -D skip opening a special file: "
 | ||||
| +${GREP} -D skip foo /dev/zero &
 | ||||
| +sleep 1
 | ||||
| +kill $! 2>/dev/null && fail || ok
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=169524
 | ||||
| +echo -n "grep -Fw looping infinitely: "
 | ||||
| +echo foobar | ${GREP} -Fw "" &
 | ||||
| +sleep 1
 | ||||
| +kill $! 2>/dev/null && fail || ok
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=140781
 | ||||
| +echo -n "fgrep hangs on binary files: "
 | ||||
| +${GREP} -F grep $(which ${GREP}) >/dev/null &
 | ||||
| +sleep 1
 | ||||
| +kill $! 2>/dev/null && fail || ok
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=161700
 | ||||
| +echo -n "grep -Fw fails to match anything: "
 | ||||
| +diff <(${GREP} -Fw test <(cat <<EOF
 | ||||
| +test
 | ||||
| +EOF
 | ||||
| +)) \
 | ||||
| +<(cat <<EOF
 | ||||
| +test
 | ||||
| +EOF
 | ||||
| +) && ok || fail
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=179698
 | ||||
| +echo -n "grep -w broken in non-utf8 multibyte locales"
 | ||||
| +diff <(echo za a | LANG=ja_JP.eucjp grep -w a) \
 | ||||
| +<(cat <<EOF
 | ||||
| +za a
 | ||||
| +EOF
 | ||||
| +) && ok || fail
 | ||||
| +
 | ||||
| +# Skip the rest of tests in compiled without PCRE
 | ||||
| +echo a |grep -P a >/dev/null || exit $failures
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=171379
 | ||||
| +echo -n "grep -P crashes on whitespace lines: "
 | ||||
| +diff <(${GREP} -P '^\s+$' <(echo "   ")) \
 | ||||
| +<(echo "   ") && ok || fail
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=204255
 | ||||
| +echo -n "-e '' does not work if not a first parameter: "
 | ||||
| +diff <(echo test | grep -e 'HighlightThis' -e '') \
 | ||||
| +     <(echo test | grep -e '' -e 'HighlightThis') && ok || fail
 | ||||
| +
 | ||||
| +U=https://bugzilla.redhat.com/show_bug.cgi?id=324781
 | ||||
| +echo -n "bad handling of line breaks with grep -P #1: "
 | ||||
| +echo -ne "a\na" | ${GREP} -P '[^a]' >/dev/null && fail || ok
 | ||||
| +
 | ||||
| +# This is mostly a check that fix for above doesn't break -P further
 | ||||
| +echo -n "bad handling of line breaks with grep -P #2: "
 | ||||
| +echo -ne "a\na" | ${GREP} -P '[^b].[^b]' >/dev/null && fail || ok
 | ||||
| +
 | ||||
| +exit $failures
 | ||||
| -- 
 | ||||
| 1.5.5.1 | ||||
| 
 | ||||
| @ -1,69 +0,0 @@ | ||||
| From 0c56b664a73764ed01607f47731c8e4607f478d5 Mon Sep 17 00:00:00 2001 | ||||
| From: Lubomir Rintel <lkundrak@v3.sk> | ||||
| Date: Sun, 23 Nov 2008 17:25:57 +0100 | ||||
| Subject: [PATCH] Fix line wrapping in PCRE backend | ||||
| 
 | ||||
| PCRE can't limit the matching to space between newlines (i.e | ||||
| [^a] will allways match newline, see pcreposix(3) for details), | ||||
| therefore whe have to split the buffer into lines and match each | ||||
| line in the buffer separately. | ||||
| 
 | ||||
| Original ticket: https://bugzilla.redhat.com/show_bug.cgi?id=324781 | ||||
| ---
 | ||||
|  src/search.c |   33 ++++++++++++++++++++++++++++----- | ||||
|  1 files changed, 28 insertions(+), 5 deletions(-) | ||||
| 
 | ||||
| diff --git a/src/search.c b/src/search.c
 | ||||
| index 0b3e0e8..7f5f187 100644
 | ||||
| --- a/src/search.c
 | ||||
| +++ b/src/search.c
 | ||||
| @@ -689,9 +689,32 @@ EXECUTE_FCT(Pexecute)
 | ||||
|       is just for performance improvement in pcre_exec.  */ | ||||
|    int sub[300]; | ||||
|   | ||||
| -  int e = pcre_exec (cre, extra, buf, size,
 | ||||
| -		     start_ptr ? (start_ptr - buf) : 0, 0,
 | ||||
| -		     sub, sizeof sub / sizeof *sub);
 | ||||
| +  char *line_buf = buf;
 | ||||
| +  int line_size = 0;
 | ||||
| +  int e = 0;
 | ||||
| +
 | ||||
| +  /* PCRE can't limit the matching to space between newlines (i.e
 | ||||
| +     [^a] will allways match newline, see pcreposix(3) for details),
 | ||||
| +     therefore whe have to match each line in the buffer separately */
 | ||||
| +  do {
 | ||||
| +    /* We're not at the of buffer or end of line, get another char */
 | ||||
| +    if (line_buf + line_size < buf + size && line_buf[line_size++] != eolbyte) {
 | ||||
| +      continue;
 | ||||
| +    }
 | ||||
| +
 | ||||
| +    /* Match the part of buffer that constitutes a line */
 | ||||
| +    e = pcre_exec (cre, extra, line_buf, line_size - 1,
 | ||||
| +		   start_ptr ? (start_ptr - buf) : 0, 0,
 | ||||
| +		   sub, sizeof sub / sizeof *sub);
 | ||||
| +
 | ||||
| +    /* Don't try other lines if this one matched or returned an error */
 | ||||
| +    if (e != PCRE_ERROR_NOMATCH)
 | ||||
| +      break;
 | ||||
| +
 | ||||
| +    /* Wrap up */
 | ||||
| +    line_buf += line_size;
 | ||||
| +    line_size = 0;
 | ||||
| +  } while (line_buf < buf + size);
 | ||||
|   | ||||
|    if (e <= 0) | ||||
|      { | ||||
| @@ -710,8 +733,8 @@ EXECUTE_FCT(Pexecute)
 | ||||
|    else | ||||
|      { | ||||
|        /* Narrow down to the line we've found.  */ | ||||
| -      char const *beg = buf + sub[0];
 | ||||
| -      char const *end = buf + sub[1];
 | ||||
| +      char const *beg = line_buf + sub[0];
 | ||||
| +      char const *end = line_buf + sub[1];
 | ||||
|        char const *buflim = buf + size; | ||||
|        char eol = eolbyte; | ||||
|        if (!start_ptr) | ||||
| -- 
 | ||||
| 1.5.5.1 | ||||
| 
 | ||||
| @ -1,68 +0,0 @@ | ||||
| --- ./src/search.c.old	2010-02-12 14:29:01.000000000 +0100
 | ||||
| +++ ./src/search.c	2010-02-12 14:29:18.207410166 +0100
 | ||||
| @@ -278,6 +278,8 @@
 | ||||
|    char eol = eolbyte; | ||||
|    int backref, start, len, best_len; | ||||
|    struct kwsmatch kwsm; | ||||
| +  static int use_dfa;
 | ||||
| +  static int use_dfa_checked = 0;
 | ||||
|    size_t i, ret_val; | ||||
|  #ifdef MBS_SUPPORT | ||||
|    int mb_cur_max = MB_CUR_MAX; | ||||
| @@ -285,6 +287,26 @@
 | ||||
|    memset (&mbs, '\0', sizeof (mbstate_t)); | ||||
|  #endif /* MBS_SUPPORT */ | ||||
|   | ||||
| +  if (!use_dfa_checked)
 | ||||
| +    {
 | ||||
| +      char *grep_use_dfa = getenv ("GREP_USE_DFA");
 | ||||
| +      if (!grep_use_dfa)
 | ||||
| +	{
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +	  /* Turn off DFA when processing multibyte input. */
 | ||||
| +	  use_dfa = (MB_CUR_MAX == 1);
 | ||||
| +#else
 | ||||
| +	  use_dfa = 1;
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +	}
 | ||||
| +      else
 | ||||
| +	{
 | ||||
| +	  use_dfa = atoi (grep_use_dfa);
 | ||||
| +	}
 | ||||
| +
 | ||||
| +      use_dfa_checked = 1;
 | ||||
| +    }
 | ||||
| +
 | ||||
|    buflim = buf + size; | ||||
|   | ||||
|    for (beg = end = buf; end < buflim; beg = end) | ||||
| @@ -353,7 +375,8 @@
 | ||||
|  #endif /* MBS_SUPPORT */ | ||||
|  		  (kwsm.index < kwset_exact_matches)) | ||||
|  		goto success; | ||||
| -	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
 | ||||
| +	      if (use_dfa &&
 | ||||
| +		  dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
 | ||||
|  		continue; | ||||
|  	    } | ||||
|  	  else | ||||
| @@ -362,7 +385,9 @@
 | ||||
|  #ifdef MBS_SUPPORT | ||||
|  	      size_t bytes_left = 0; | ||||
|  #endif /* MBS_SUPPORT */ | ||||
| -	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
 | ||||
| +	      size_t offset = 0;
 | ||||
| +	      if (use_dfa)
 | ||||
| +		offset = dfaexec (&dfa, beg, buflim - beg, &backref);
 | ||||
|  	      if (offset == (size_t) -1) | ||||
|  		break; | ||||
|  	      /* Narrow down to the line we've found. */ | ||||
| @@ -404,7 +429,7 @@
 | ||||
|  		--beg; | ||||
|  	    } | ||||
|  	  /* Successful, no backreferences encountered! */ | ||||
| -	  if (!backref)
 | ||||
| +	  if (use_dfa && !backref)
 | ||||
|  	    goto success; | ||||
|  	} | ||||
|        else | ||||
| @ -1,14 +0,0 @@ | ||||
| From latin1 to utf8 | ||||
| 
 | ||||
| diff -up grep-2.5.4/AUTHORS.utf8 grep-2.5.4/AUTHORS
 | ||||
| --- grep-2.5.1a/AUTHORS.utf8	2008-10-01 18:42:53.000000000 +0200
 | ||||
| +++ grep-2.5.1a/AUTHORS	2008-10-01 18:43:12.000000000 +0200
 | ||||
| @@ -40,7 +40,7 @@ send me email.
 | ||||
|   | ||||
|  Alain Magloire maintained GNU grep until version 2.5e. | ||||
|   | ||||
| -Bernhard "Bero" Rosenkränzer <bero@arklinux.org> maintained GNU grep until
 | ||||
| +Bernhard "Bero" Rosenkränzer <bero@arklinux.org> maintained GNU grep until
 | ||||
|  version 2.5.1, ie. from Sep 2001 till 2003. | ||||
|   | ||||
|  Stepan Kasal <kasal@ucw.cz> maintained GNU grep since Feb 2004. | ||||
| @ -1,117 +0,0 @@ | ||||
| --- a/src/search.c	2007-10-01 14:47:55.000000000 +0200
 | ||||
| +++ b/src/search.c	2007-09-30 23:38:45.000000000 +0200
 | ||||
| @@ -282,6 +282,7 @@
 | ||||
|    static int use_dfa_checked = 0; | ||||
|    size_t i, ret_val; | ||||
|  #ifdef MBS_SUPPORT | ||||
| +  const char *last_char = NULL;
 | ||||
|    int mb_cur_max = MB_CUR_MAX; | ||||
|    mbstate_t mbs; | ||||
|    memset (&mbs, '\0', sizeof (mbstate_t)); | ||||
| @@ -338,6 +341,8 @@
 | ||||
|  		  while (bytes_left) | ||||
|  		    { | ||||
|  		      size_t mlen = mbrlen (beg, bytes_left, &mbs); | ||||
| +
 | ||||
| +		      last_char = beg;
 | ||||
|  		      if (mlen == (size_t) -1 || mlen == 0) | ||||
|  			{ | ||||
|  			  /* Incomplete character: treat as single-byte. */ | ||||
| @@ -398,6 +403,8 @@
 | ||||
|  		  while (bytes_left) | ||||
|  		    { | ||||
|  		      size_t mlen = mbrlen (beg, bytes_left, &mbs); | ||||
| +
 | ||||
| +		      last_char = beg;
 | ||||
|  		      if (mlen == (size_t) -1 || mlen == 0) | ||||
|  			{ | ||||
|  			  /* Incomplete character: treat as single-byte. */ | ||||
| @@ -475,10 +483,84 @@
 | ||||
|  	      if (match_words) | ||||
|  		while (match <= best_match) | ||||
|  		  { | ||||
| +		    int lword_match = 0;
 | ||||
| +		    if (match == buf)
 | ||||
| +		      lword_match = 1;
 | ||||
| +		    else
 | ||||
| +		      {
 | ||||
| +			assert (start > 0);
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +			if (mb_cur_max > 1)
 | ||||
| +			  {
 | ||||
| +			    const char *s;
 | ||||
| +			    int mr;
 | ||||
| +			    wchar_t pwc;
 | ||||
| +			    if (using_utf8)
 | ||||
| +			      {
 | ||||
| +				s = match - 1;
 | ||||
| +				while (s > buf
 | ||||
| +				       && (unsigned char) *s >= 0x80
 | ||||
| +				       && (unsigned char) *s <= 0xbf)
 | ||||
| +				  --s;
 | ||||
| +			      }
 | ||||
| +			    else
 | ||||
| +			      s = last_char;
 | ||||
| +			    mr = mbtowc (&pwc, s, match - s);
 | ||||
| +			    if (mr <= 0)
 | ||||
| +			      {
 | ||||
| +				memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +				lword_match = 1;
 | ||||
| +			      }
 | ||||
| +			    else if (!(iswalnum (pwc) || pwc == L'_')
 | ||||
| +				     && mr == (int) (match - s))
 | ||||
| +			      lword_match = 1;
 | ||||
| +			  }
 | ||||
| +			else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +			if (!WCHAR ((unsigned char) match[-1]))
 | ||||
| +			  lword_match = 1;
 | ||||
| +		      }
 | ||||
| +
 | ||||
| +		    if (lword_match)
 | ||||
| +		      {
 | ||||
| +			int rword_match = 0;
 | ||||
| +			if (start + len == end - beg - 1)
 | ||||
| +			  rword_match = 1;
 | ||||
| +			else
 | ||||
| +			  {
 | ||||
| +#ifdef MBS_SUPPORT
 | ||||
| +			    if (mb_cur_max > 1)
 | ||||
| +			      {
 | ||||
| +				wchar_t nwc;
 | ||||
| +				int mr;
 | ||||
| +
 | ||||
| +				mr = mbtowc (&nwc, buf + start + len,
 | ||||
| +					     end - buf - start - len - 1);
 | ||||
| +				if (mr <= 0)
 | ||||
| +				  {
 | ||||
| +				    memset (&mbs, '\0', sizeof (mbstate_t));
 | ||||
| +				    rword_match = 1;
 | ||||
| +				  }
 | ||||
| +				else if (!iswalnum (nwc) && nwc != L'_')
 | ||||
| +				  rword_match = 1;
 | ||||
| +			      }
 | ||||
| +			    else
 | ||||
| +#endif /* MBS_SUPPORT */
 | ||||
| +			    if (!WCHAR ((unsigned char) match[len]))
 | ||||
| +			      rword_match = 1;
 | ||||
| +			  }
 | ||||
| +
 | ||||
| +			if (rword_match)
 | ||||
| +			  {
 | ||||
| +			    if (!start_ptr)
 | ||||
| +			      /* Returns the whole line. */
 | ||||
| +			      goto success;
 | ||||
| +			    else
 | ||||
| +			      {
 | ||||
| +				goto assess_pattern_match;
 | ||||
| +			      }
 | ||||
| +			  }
 | ||||
| +		      }
 | ||||
| -		    if ((match == buf || !WCHAR ((unsigned char) match[-1]))
 | ||||
| -			&& (len == end - beg - 1
 | ||||
| -			    || !WCHAR ((unsigned char) match[len])))
 | ||||
| -		      goto assess_pattern_match;
 | ||||
|  		    if (len > 0) | ||||
|  		      { | ||||
|  			/* Try a shorter length anchored at the same place. */ | ||||
							
								
								
									
										20
									
								
								grep-2.6-sigsegv.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								grep-2.6-sigsegv.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | ||||
| --- a/src/dfa.c	2010-03-22 17:22:51.000000000 +0100
 | ||||
| +++ b/src/dfa.c	2010-03-23 16:55:43.974271709 +0100
 | ||||
| @@ -243,7 +243,7 @@
 | ||||
|     For MB_CUR_MAX > 1, one or both of the two cases may not be set, | ||||
|     so the resulting charset may only be used as an optimization.  */ | ||||
|  static void | ||||
| -setbit_case_fold (unsigned int b, charclass c)
 | ||||
| +setbit_case_fold (wint_t b, charclass c)
 | ||||
|  { | ||||
|    if (case_fold) | ||||
|      { | ||||
| @@ -691,7 +691,7 @@
 | ||||
|  	  continue; | ||||
|  	} | ||||
|   | ||||
| -      setbit_case_fold (c, ccl);
 | ||||
| +      setbit_case_fold (wc, ccl);
 | ||||
|  #ifdef MBS_SUPPORT | ||||
|        /* Build normal characters.  */ | ||||
|        if (MB_CUR_MAX > 1) | ||||
| @ -1,12 +0,0 @@ | ||||
| diff -up grep-2.5.3/src/search.c.bz479152 grep-2.5.3/src/search.c
 | ||||
| --- grep-2.5.3/src/search.c.bz479152	2009-01-08 09:57:12.000000000 +0100
 | ||||
| +++ grep-2.5.3/src/search.c	2009-01-08 09:58:57.000000000 +0100
 | ||||
| @@ -757,7 +757,7 @@ EXECUTE_FCT(Fexecute)
 | ||||
|    const char *last_char = NULL; | ||||
|  #endif /* MBS_SUPPORT */ | ||||
|   | ||||
| -  for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
 | ||||
| +  for (beg = start_ptr ? start_ptr : buf; beg < buf + size; beg++)
 | ||||
|      { | ||||
|        size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); | ||||
|        if (offset == (size_t) -1) | ||||
							
								
								
									
										34
									
								
								grep.spec
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								grep.spec
									
									
									
									
									
								
							| @ -2,21 +2,12 @@ | ||||
| 
 | ||||
| Summary: Pattern matching utilities | ||||
| Name: grep | ||||
| Version: 2.5.4 | ||||
| Release: 2%{?dist} | ||||
| Version: 2.6 | ||||
| Release: 1%{?dist} | ||||
| License: GPLv3+ | ||||
| Group: Applications/Text | ||||
| Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.bz2 | ||||
| Patch0: grep-2.5.3-fedora-tests.patch | ||||
| Patch2: grep-2.5.3-pcrewrap.patch | ||||
| Patch3: grep-2.5.3-case.patch | ||||
| Patch4: grep-2.5.3-egf-speedup.patch | ||||
| # bug #460641 (a.k.a. 479152) | ||||
| Patch40: grep-bz460641.patch | ||||
| Patch5: grep-2.5.4-utf8.patch | ||||
| Patch6: grep-2.5.4-dfa-optional.patch | ||||
| # fix -w switch behaviour broken by dfa-optional.patch | ||||
| Patch13: grep-2.5.4-w.patch | ||||
| Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.xz | ||||
| Patch0: grep-2.6-sigsegv.patch | ||||
| URL: http://www.gnu.org/software/grep/ | ||||
| Requires(post): /sbin/install-info | ||||
| Requires(preun): /sbin/install-info | ||||
| @ -33,14 +24,7 @@ GNU grep is needed by many scripts, so it shall be installed on every system. | ||||
| 
 | ||||
| %prep | ||||
| %setup -q | ||||
| %patch0 -p1 -b .fedora-tests | ||||
| %patch2 -p1 -b .pcrewrap | ||||
| %patch3 -p1 -b .case | ||||
| %patch4 -p1 -b .egf-speedup | ||||
| %patch40 -p1 -b .bz460641 | ||||
| %patch5 -p1 -b .utf8 | ||||
| %patch6 -p1 -b .dfa-optional | ||||
| %patch13 -p1 -b .w | ||||
| %patch0 -p1 -b .sigsegv | ||||
| 
 | ||||
| %build | ||||
| %configure --without-included-regex CPPFLAGS="-I%{_includedir}/pcre" | ||||
| @ -62,11 +46,9 @@ make check ||: | ||||
| rm -rf ${RPM_BUILD_ROOT} | ||||
| 
 | ||||
| %post | ||||
| #exec >/dev/null 2>&1 | ||||
| /sbin/install-info --quiet --info-dir=%{_infodir} %{_infodir}/grep.info.gz || : | ||||
| 
 | ||||
| %preun | ||||
| #exec >/dev/null 2>&1 | ||||
| if [ $1 = 0 ]; then | ||||
| 	/sbin/install-info --quiet --info-dir=%{_infodir} --delete %{_infodir}/grep.info.gz || : | ||||
| fi | ||||
| @ -80,6 +62,12 @@ fi | ||||
| %{_mandir}/*/* | ||||
| 
 | ||||
| %changelog | ||||
| * Tue Mar 23 2010 Jaroslav Škarvada <jskarvad@redhat.com> - 2.6-1 | ||||
| - New version: grep-2.6 | ||||
| - Added sigsegv patch (after release patch from upstream) | ||||
| - Dropped obsoleted patches: fedora-tests, pcrewrap, case, egf-speedup, | ||||
|   bz460641, utf8, dfa-optional, w | ||||
| 
 | ||||
| * Fri Mar 05 2010 Jaroslav Škarvada <jskarvad@redhat.com> - 2.5.4-2 | ||||
| - Added w patch to fix -w switch behaviour broken by dfa-optional patch | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user