Fix pcregrep loop when \K is used in a lookbehind assertion
This commit is contained in:
		
							parent
							
								
									c035c85cd5
								
							
						
					
					
						commit
						9bd723e9c9
					
				| @ -0,0 +1,211 @@ | |||||||
|  | From 8f907e8731b5eb4aca840a1979e76eed1b5c8175 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | ||||||
|  | Date: Tue, 7 Apr 2015 15:52:11 +0000 | ||||||
|  | Subject: [PATCH] Fix pcregrep loop when \K is used in a lookbehind assertion. | ||||||
|  | MIME-Version: 1.0 | ||||||
|  | Content-Type: text/plain; charset=UTF-8 | ||||||
|  | Content-Transfer-Encoding: 8bit | ||||||
|  | 
 | ||||||
|  | Upstream commit ported to 8.36: | ||||||
|  | 
 | ||||||
|  | commit b4332d7dd831b3547b3f541495de4a79554e538e | ||||||
|  | Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | ||||||
|  | Date:   Tue Apr 7 15:52:11 2015 +0000 | ||||||
|  | 
 | ||||||
|  |     Fix pcregrep loop when \K is used in a lookbehind assertion. | ||||||
|  | 
 | ||||||
|  |     git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1543 2f5784b3-3f2a-0410-8824-cb99058d5e15 | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Petr Písař <ppisar@redhat.com> | ||||||
|  | ---
 | ||||||
|  |  RunGrepTest         |   5 +++ | ||||||
|  |  pcregrep.c          | 109 ++++++++++++++++++++++++++++++++++------------------ | ||||||
|  |  testdata/grepoutput |   8 ++++ | ||||||
|  |  3 files changed, 85 insertions(+), 37 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/RunGrepTest b/RunGrepTest
 | ||||||
|  | index f1b0348..766278b 100755
 | ||||||
|  | --- a/RunGrepTest
 | ||||||
|  | +++ b/RunGrepTest
 | ||||||
|  | @@ -506,6 +506,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes
 | ||||||
|  |  (cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1 | ||||||
|  |  echo "RC=$?" >>testtrygrep | ||||||
|  |   | ||||||
|  | +echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
 | ||||||
|  | +echo "a" >testtemp1grep
 | ||||||
|  | +echo "aaaaa" >>testtemp1grep
 | ||||||
|  | +(cd $srcdir; $valgrind $pcregrep  --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1
 | ||||||
|  | +echo "RC=$?" >>testtrygrep
 | ||||||
|  |   | ||||||
|  |  # Now compare the results. | ||||||
|  |   | ||||||
|  | diff --git a/pcregrep.c b/pcregrep.c
 | ||||||
|  | index 4f7fa38..b1af129 100644
 | ||||||
|  | --- a/pcregrep.c
 | ||||||
|  | +++ b/pcregrep.c
 | ||||||
|  | @@ -1582,11 +1582,14 @@ while (ptr < endptr)
 | ||||||
|  |    int endlinelength; | ||||||
|  |    int mrc = 0; | ||||||
|  |    int startoffset = 0; | ||||||
|  | +  int prevoffsets[2]; 
 | ||||||
|  |    unsigned int options = 0; | ||||||
|  |    BOOL match; | ||||||
|  |    char *matchptr = ptr; | ||||||
|  |    char *t = ptr; | ||||||
|  |    size_t length, linelength; | ||||||
|  | +  
 | ||||||
|  | +  prevoffsets[0] = prevoffsets[1] = -1; 
 | ||||||
|  |   | ||||||
|  |    /* At this point, ptr is at the start of a line. We need to find the length | ||||||
|  |    of the subject string to pass to pcre_exec(). In multiline mode, it is the | ||||||
|  | @@ -1729,55 +1732,86 @@ while (ptr < endptr)
 | ||||||
|  |        { | ||||||
|  |        if (!invert) | ||||||
|  |          { | ||||||
|  | -        if (printname != NULL) fprintf(stdout, "%s:", printname);
 | ||||||
|  | -        if (number) fprintf(stdout, "%d:", linenumber);
 | ||||||
|  | -
 | ||||||
|  | -        /* Handle --line-offsets */
 | ||||||
|  | -
 | ||||||
|  | -        if (line_offsets)
 | ||||||
|  | -          fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
 | ||||||
|  | -            offsets[1] - offsets[0]);
 | ||||||
|  | -
 | ||||||
|  | -        /* Handle --file-offsets */
 | ||||||
|  | -
 | ||||||
|  | -        else if (file_offsets)
 | ||||||
|  | -          fprintf(stdout, "%d,%d\n",
 | ||||||
|  | -            (int)(filepos + matchptr + offsets[0] - ptr),
 | ||||||
|  | -            offsets[1] - offsets[0]);
 | ||||||
|  | -
 | ||||||
|  | -        /* Handle --only-matching, which may occur many times */
 | ||||||
|  | -
 | ||||||
|  | -        else
 | ||||||
|  | +        int oldstartoffset = startoffset;
 | ||||||
|  | +        
 | ||||||
|  | +        /* It is possible, when a lookbehind assertion contains \K, for the 
 | ||||||
|  | +        same string to be found again. The code below advances startoffset, but 
 | ||||||
|  | +        until it is past the "bumpalong" offset that gave the match, the same
 | ||||||
|  | +        substring will be returned. The PCRE1 library does not return the
 | ||||||
|  | +        bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
 | ||||||
|  | +        does this better.) */
 | ||||||
|  | +         
 | ||||||
|  | +        if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
 | ||||||
|  |            { | ||||||
|  | -          BOOL printed = FALSE;
 | ||||||
|  | -          omstr *om;
 | ||||||
|  | -
 | ||||||
|  | -          for (om = only_matching; om != NULL; om = om->next)
 | ||||||
|  | +          prevoffsets[0] = offsets[0];
 | ||||||
|  | +          prevoffsets[1] = offsets[1]; 
 | ||||||
|  | +            
 | ||||||
|  | +          if (printname != NULL) fprintf(stdout, "%s:", printname);
 | ||||||
|  | +          if (number) fprintf(stdout, "%d:", linenumber);
 | ||||||
|  | +          
 | ||||||
|  | +          /* Handle --line-offsets */
 | ||||||
|  | +          
 | ||||||
|  | +          if (line_offsets)
 | ||||||
|  | +            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
 | ||||||
|  | +              offsets[1] - offsets[0]);
 | ||||||
|  | +          
 | ||||||
|  | +          /* Handle --file-offsets */
 | ||||||
|  | +          
 | ||||||
|  | +          else if (file_offsets)
 | ||||||
|  | +            fprintf(stdout, "%d,%d\n",
 | ||||||
|  | +              (int)(filepos + matchptr + offsets[0] - ptr),
 | ||||||
|  | +              offsets[1] - offsets[0]);
 | ||||||
|  | +          
 | ||||||
|  | +          /* Handle --only-matching, which may occur many times */
 | ||||||
|  | +          
 | ||||||
|  | +          else
 | ||||||
|  |              { | ||||||
|  | -            int n = om->groupnum;
 | ||||||
|  | -            if (n < mrc)
 | ||||||
|  | +            BOOL printed = FALSE;
 | ||||||
|  | +            omstr *om;
 | ||||||
|  | +          
 | ||||||
|  | +            for (om = only_matching; om != NULL; om = om->next)
 | ||||||
|  |                { | ||||||
|  | -              int plen = offsets[2*n + 1] - offsets[2*n];
 | ||||||
|  | -              if (plen > 0)
 | ||||||
|  | +              int n = om->groupnum;
 | ||||||
|  | +              if (n < mrc)
 | ||||||
|  |                  { | ||||||
|  | -                if (printed) fprintf(stdout, "%s", om_separator);
 | ||||||
|  | -                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 | ||||||
|  | -                FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
 | ||||||
|  | -                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
 | ||||||
|  | -                printed = TRUE;
 | ||||||
|  | +                int plen = offsets[2*n + 1] - offsets[2*n];
 | ||||||
|  | +                if (plen > 0)
 | ||||||
|  | +                  {
 | ||||||
|  | +                  if (printed) fprintf(stdout, "%s", om_separator);
 | ||||||
|  | +                  if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 | ||||||
|  | +                  FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
 | ||||||
|  | +                  if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
 | ||||||
|  | +                  printed = TRUE;
 | ||||||
|  | +                  }
 | ||||||
|  |                  } | ||||||
|  |                } | ||||||
|  | +          
 | ||||||
|  | +            if (printed || printname != NULL || number) fprintf(stdout, "\n");
 | ||||||
|  |              } | ||||||
|  | -
 | ||||||
|  | -          if (printed || printname != NULL || number) fprintf(stdout, "\n");
 | ||||||
|  | -          }
 | ||||||
|  | -
 | ||||||
|  | -        /* Prepare to repeat to find the next match */
 | ||||||
|  | +          }   
 | ||||||
|  | +
 | ||||||
|  | +        /* Prepare to repeat to find the next match. If the patterned contained 
 | ||||||
|  | +        a lookbehind tht included \K, it is possible that the end of the match 
 | ||||||
|  | +        might be at or before the actual strting offset we have just used. We 
 | ||||||
|  | +        need to start one character further on. Unfortunately, for unanchored 
 | ||||||
|  | +        patterns, the actual start offset can be greater that the one that was 
 | ||||||
|  | +        set as a result of "bumpalong". PCRE1 does not return the actual start 
 | ||||||
|  | +        offset, so we have to check against the original start offset. This may 
 | ||||||
|  | +        lead to duplicates - we we need the fudge above to avoid printing them. 
 | ||||||
|  | +        (PCRE2 does this better.) */
 | ||||||
|  |   | ||||||
|  |          match = FALSE; | ||||||
|  |          if (line_buffered) fflush(stdout); | ||||||
|  |          rc = 0;                      /* Had some success */ | ||||||
|  |          startoffset = offsets[1];    /* Restart after the match */ | ||||||
|  | +        if (startoffset <= oldstartoffset)
 | ||||||
|  | +          {
 | ||||||
|  | +          if ((size_t)startoffset >= length) 
 | ||||||
|  | +            goto END_ONE_MATCH;              /* We were at the end */
 | ||||||
|  | +          startoffset = oldstartoffset + 1;
 | ||||||
|  | +          if (utf8)
 | ||||||
|  | +            while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;    
 | ||||||
|  | +          }   
 | ||||||
|  |          goto ONLY_MATCHING_RESTART; | ||||||
|  |          } | ||||||
|  |        } | ||||||
|  | @@ -1974,6 +2008,7 @@ while (ptr < endptr)
 | ||||||
|  |    /* Advance to after the newline and increment the line number. The file | ||||||
|  |    offset to the current line is maintained in filepos. */ | ||||||
|  |   | ||||||
|  | +  END_ONE_MATCH:
 | ||||||
|  |    ptr += linelength + endlinelength; | ||||||
|  |    filepos += (int)(linelength + endlinelength); | ||||||
|  |    linenumber++; | ||||||
|  | diff --git a/testdata/grepoutput b/testdata/grepoutput
 | ||||||
|  | index 9bf9d9d..4d61752 100644
 | ||||||
|  | --- a/testdata/grepoutput
 | ||||||
|  | +++ b/testdata/grepoutput
 | ||||||
|  | @@ -743,3 +743,11 @@ RC=0
 | ||||||
|  |  ---------------------------- Test 106 ----------------------------- | ||||||
|  |  a | ||||||
|  |  RC=0 | ||||||
|  | +---------------------------- Test 107 -----------------------------
 | ||||||
|  | +1:0,1
 | ||||||
|  | +2:0,1
 | ||||||
|  | +2:1,1
 | ||||||
|  | +2:2,1
 | ||||||
|  | +2:3,1
 | ||||||
|  | +2:4,1
 | ||||||
|  | +RC=0
 | ||||||
|  | -- 
 | ||||||
|  | 2.1.0 | ||||||
|  | 
 | ||||||
| @ -34,6 +34,9 @@ Patch7: pcre-8.36-Fix-comment-between-subroutine-call-and-quantifier-b.patch | |||||||
| # Fix compliation of mutual recursion inside a lookbehind assertion, | # Fix compliation of mutual recursion inside a lookbehind assertion, | ||||||
| # bug #1210417, in upstream after 8.36 | # bug #1210417, in upstream after 8.36 | ||||||
| Patch8: pcre-8.36-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch | Patch8: pcre-8.36-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch | ||||||
|  | # Fix pcregrep loop when \K is used in a lookbehind assertion, bug #1210423, | ||||||
|  | # in upstream after 8.36 | ||||||
|  | Patch9: pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch | ||||||
| BuildRequires: readline-devel | BuildRequires: readline-devel | ||||||
| # New libtool to get rid of rpath | # New libtool to get rid of rpath | ||||||
| BuildRequires: autoconf, automake, libtool | BuildRequires: autoconf, automake, libtool | ||||||
| @ -82,6 +85,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest. | |||||||
| %patch6 -p1 -b .mutual_recursion | %patch6 -p1 -b .mutual_recursion | ||||||
| %patch7 -p1 -b .parenthesized_comment | %patch7 -p1 -b .parenthesized_comment | ||||||
| %patch8 -p1 -b .mutual_recursion_in_assertion | %patch8 -p1 -b .mutual_recursion_in_assertion | ||||||
|  | %patch9 -p1 -b .pcregrep_k_loop | ||||||
| # Because of rpath patch | # Because of rpath patch | ||||||
| libtoolize --copy --force && autoreconf -vif | libtoolize --copy --force && autoreconf -vif | ||||||
| # One contributor's name is non-UTF-8 | # One contributor's name is non-UTF-8 | ||||||
| @ -160,6 +164,7 @@ make %{?_smp_mflags} check VERBOSE=yes | |||||||
| - Fix compilation of a parenthesized comment (bug #1210410) | - Fix compilation of a parenthesized comment (bug #1210410) | ||||||
| - Fix compliation of mutual recursion inside a lookbehind assertion | - Fix compliation of mutual recursion inside a lookbehind assertion | ||||||
|   (bug #1210417) |   (bug #1210417) | ||||||
|  | - Fix pcregrep loop when \K is used in a lookbehind assertion (bug #1210423) | ||||||
| 
 | 
 | ||||||
| * Thu Mar 26 2015 Petr Pisar <ppisar@redhat.com> - 8.36-4 | * Thu Mar 26 2015 Petr Pisar <ppisar@redhat.com> - 8.36-4 | ||||||
| - Fix computing size of JIT read-only data (bug #1206131) | - Fix computing size of JIT read-only data (bug #1206131) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user