Fix pcregrep loop when \K is used in a lookbehind assertion
This commit is contained in:
parent
c035c85cd5
commit
9bd723e9c9
@ -0,0 +1,211 @@
|
|||||||
|
From 8f907e8731b5eb4aca840a1979e76eed1b5c8175 Mon Sep 17 00:00:00 2001
|
||||||
|
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
||||||
|
Date: Tue, 7 Apr 2015 15:52:11 +0000
|
||||||
|
Subject: [PATCH] Fix pcregrep loop when \K is used in a lookbehind assertion.
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Upstream commit ported to 8.36:
|
||||||
|
|
||||||
|
commit b4332d7dd831b3547b3f541495de4a79554e538e
|
||||||
|
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
||||||
|
Date: Tue Apr 7 15:52:11 2015 +0000
|
||||||
|
|
||||||
|
Fix pcregrep loop when \K is used in a lookbehind assertion.
|
||||||
|
|
||||||
|
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1543 2f5784b3-3f2a-0410-8824-cb99058d5e15
|
||||||
|
|
||||||
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||||
|
---
|
||||||
|
RunGrepTest | 5 +++
|
||||||
|
pcregrep.c | 109 ++++++++++++++++++++++++++++++++++------------------
|
||||||
|
testdata/grepoutput | 8 ++++
|
||||||
|
3 files changed, 85 insertions(+), 37 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/RunGrepTest b/RunGrepTest
|
||||||
|
index f1b0348..766278b 100755
|
||||||
|
--- a/RunGrepTest
|
||||||
|
+++ b/RunGrepTest
|
||||||
|
@@ -506,6 +506,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes
|
||||||
|
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
+echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
|
||||||
|
+echo "a" >testtemp1grep
|
||||||
|
+echo "aaaaa" >>testtemp1grep
|
||||||
|
+(cd $srcdir; $valgrind $pcregrep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1
|
||||||
|
+echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
# Now compare the results.
|
||||||
|
|
||||||
|
diff --git a/pcregrep.c b/pcregrep.c
|
||||||
|
index 4f7fa38..b1af129 100644
|
||||||
|
--- a/pcregrep.c
|
||||||
|
+++ b/pcregrep.c
|
||||||
|
@@ -1582,11 +1582,14 @@ while (ptr < endptr)
|
||||||
|
int endlinelength;
|
||||||
|
int mrc = 0;
|
||||||
|
int startoffset = 0;
|
||||||
|
+ int prevoffsets[2];
|
||||||
|
unsigned int options = 0;
|
||||||
|
BOOL match;
|
||||||
|
char *matchptr = ptr;
|
||||||
|
char *t = ptr;
|
||||||
|
size_t length, linelength;
|
||||||
|
+
|
||||||
|
+ prevoffsets[0] = prevoffsets[1] = -1;
|
||||||
|
|
||||||
|
/* At this point, ptr is at the start of a line. We need to find the length
|
||||||
|
of the subject string to pass to pcre_exec(). In multiline mode, it is the
|
||||||
|
@@ -1729,55 +1732,86 @@ while (ptr < endptr)
|
||||||
|
{
|
||||||
|
if (!invert)
|
||||||
|
{
|
||||||
|
- if (printname != NULL) fprintf(stdout, "%s:", printname);
|
||||||
|
- if (number) fprintf(stdout, "%d:", linenumber);
|
||||||
|
-
|
||||||
|
- /* Handle --line-offsets */
|
||||||
|
-
|
||||||
|
- if (line_offsets)
|
||||||
|
- fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
|
||||||
|
- offsets[1] - offsets[0]);
|
||||||
|
-
|
||||||
|
- /* Handle --file-offsets */
|
||||||
|
-
|
||||||
|
- else if (file_offsets)
|
||||||
|
- fprintf(stdout, "%d,%d\n",
|
||||||
|
- (int)(filepos + matchptr + offsets[0] - ptr),
|
||||||
|
- offsets[1] - offsets[0]);
|
||||||
|
-
|
||||||
|
- /* Handle --only-matching, which may occur many times */
|
||||||
|
-
|
||||||
|
- else
|
||||||
|
+ int oldstartoffset = startoffset;
|
||||||
|
+
|
||||||
|
+ /* It is possible, when a lookbehind assertion contains \K, for the
|
||||||
|
+ same string to be found again. The code below advances startoffset, but
|
||||||
|
+ until it is past the "bumpalong" offset that gave the match, the same
|
||||||
|
+ substring will be returned. The PCRE1 library does not return the
|
||||||
|
+ bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
|
||||||
|
+ does this better.) */
|
||||||
|
+
|
||||||
|
+ if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
|
||||||
|
{
|
||||||
|
- BOOL printed = FALSE;
|
||||||
|
- omstr *om;
|
||||||
|
-
|
||||||
|
- for (om = only_matching; om != NULL; om = om->next)
|
||||||
|
+ prevoffsets[0] = offsets[0];
|
||||||
|
+ prevoffsets[1] = offsets[1];
|
||||||
|
+
|
||||||
|
+ if (printname != NULL) fprintf(stdout, "%s:", printname);
|
||||||
|
+ if (number) fprintf(stdout, "%d:", linenumber);
|
||||||
|
+
|
||||||
|
+ /* Handle --line-offsets */
|
||||||
|
+
|
||||||
|
+ if (line_offsets)
|
||||||
|
+ fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
|
||||||
|
+ offsets[1] - offsets[0]);
|
||||||
|
+
|
||||||
|
+ /* Handle --file-offsets */
|
||||||
|
+
|
||||||
|
+ else if (file_offsets)
|
||||||
|
+ fprintf(stdout, "%d,%d\n",
|
||||||
|
+ (int)(filepos + matchptr + offsets[0] - ptr),
|
||||||
|
+ offsets[1] - offsets[0]);
|
||||||
|
+
|
||||||
|
+ /* Handle --only-matching, which may occur many times */
|
||||||
|
+
|
||||||
|
+ else
|
||||||
|
{
|
||||||
|
- int n = om->groupnum;
|
||||||
|
- if (n < mrc)
|
||||||
|
+ BOOL printed = FALSE;
|
||||||
|
+ omstr *om;
|
||||||
|
+
|
||||||
|
+ for (om = only_matching; om != NULL; om = om->next)
|
||||||
|
{
|
||||||
|
- int plen = offsets[2*n + 1] - offsets[2*n];
|
||||||
|
- if (plen > 0)
|
||||||
|
+ int n = om->groupnum;
|
||||||
|
+ if (n < mrc)
|
||||||
|
{
|
||||||
|
- if (printed) fprintf(stdout, "%s", om_separator);
|
||||||
|
- if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
|
||||||
|
- FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
|
||||||
|
- if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
|
||||||
|
- printed = TRUE;
|
||||||
|
+ int plen = offsets[2*n + 1] - offsets[2*n];
|
||||||
|
+ if (plen > 0)
|
||||||
|
+ {
|
||||||
|
+ if (printed) fprintf(stdout, "%s", om_separator);
|
||||||
|
+ if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
|
||||||
|
+ FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
|
||||||
|
+ if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
|
||||||
|
+ printed = TRUE;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ if (printed || printname != NULL || number) fprintf(stdout, "\n");
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- if (printed || printname != NULL || number) fprintf(stdout, "\n");
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* Prepare to repeat to find the next match */
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Prepare to repeat to find the next match. If the patterned contained
|
||||||
|
+ a lookbehind tht included \K, it is possible that the end of the match
|
||||||
|
+ might be at or before the actual strting offset we have just used. We
|
||||||
|
+ need to start one character further on. Unfortunately, for unanchored
|
||||||
|
+ patterns, the actual start offset can be greater that the one that was
|
||||||
|
+ set as a result of "bumpalong". PCRE1 does not return the actual start
|
||||||
|
+ offset, so we have to check against the original start offset. This may
|
||||||
|
+ lead to duplicates - we we need the fudge above to avoid printing them.
|
||||||
|
+ (PCRE2 does this better.) */
|
||||||
|
|
||||||
|
match = FALSE;
|
||||||
|
if (line_buffered) fflush(stdout);
|
||||||
|
rc = 0; /* Had some success */
|
||||||
|
startoffset = offsets[1]; /* Restart after the match */
|
||||||
|
+ if (startoffset <= oldstartoffset)
|
||||||
|
+ {
|
||||||
|
+ if ((size_t)startoffset >= length)
|
||||||
|
+ goto END_ONE_MATCH; /* We were at the end */
|
||||||
|
+ startoffset = oldstartoffset + 1;
|
||||||
|
+ if (utf8)
|
||||||
|
+ while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
|
||||||
|
+ }
|
||||||
|
goto ONLY_MATCHING_RESTART;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -1974,6 +2008,7 @@ while (ptr < endptr)
|
||||||
|
/* Advance to after the newline and increment the line number. The file
|
||||||
|
offset to the current line is maintained in filepos. */
|
||||||
|
|
||||||
|
+ END_ONE_MATCH:
|
||||||
|
ptr += linelength + endlinelength;
|
||||||
|
filepos += (int)(linelength + endlinelength);
|
||||||
|
linenumber++;
|
||||||
|
diff --git a/testdata/grepoutput b/testdata/grepoutput
|
||||||
|
index 9bf9d9d..4d61752 100644
|
||||||
|
--- a/testdata/grepoutput
|
||||||
|
+++ b/testdata/grepoutput
|
||||||
|
@@ -743,3 +743,11 @@ RC=0
|
||||||
|
---------------------------- Test 106 -----------------------------
|
||||||
|
a
|
||||||
|
RC=0
|
||||||
|
+---------------------------- Test 107 -----------------------------
|
||||||
|
+1:0,1
|
||||||
|
+2:0,1
|
||||||
|
+2:1,1
|
||||||
|
+2:2,1
|
||||||
|
+2:3,1
|
||||||
|
+2:4,1
|
||||||
|
+RC=0
|
||||||
|
--
|
||||||
|
2.1.0
|
||||||
|
|
@ -34,6 +34,9 @@ Patch7: pcre-8.36-Fix-comment-between-subroutine-call-and-quantifier-b.patch
|
|||||||
# Fix compliation of mutual recursion inside a lookbehind assertion,
|
# Fix compliation of mutual recursion inside a lookbehind assertion,
|
||||||
# bug #1210417, in upstream after 8.36
|
# bug #1210417, in upstream after 8.36
|
||||||
Patch8: pcre-8.36-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch
|
Patch8: pcre-8.36-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch
|
||||||
|
# Fix pcregrep loop when \K is used in a lookbehind assertion, bug #1210423,
|
||||||
|
# in upstream after 8.36
|
||||||
|
Patch9: pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
|
||||||
BuildRequires: readline-devel
|
BuildRequires: readline-devel
|
||||||
# New libtool to get rid of rpath
|
# New libtool to get rid of rpath
|
||||||
BuildRequires: autoconf, automake, libtool
|
BuildRequires: autoconf, automake, libtool
|
||||||
@ -82,6 +85,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
|
|||||||
%patch6 -p1 -b .mutual_recursion
|
%patch6 -p1 -b .mutual_recursion
|
||||||
%patch7 -p1 -b .parenthesized_comment
|
%patch7 -p1 -b .parenthesized_comment
|
||||||
%patch8 -p1 -b .mutual_recursion_in_assertion
|
%patch8 -p1 -b .mutual_recursion_in_assertion
|
||||||
|
%patch9 -p1 -b .pcregrep_k_loop
|
||||||
# Because of rpath patch
|
# Because of rpath patch
|
||||||
libtoolize --copy --force && autoreconf -vif
|
libtoolize --copy --force && autoreconf -vif
|
||||||
# One contributor's name is non-UTF-8
|
# One contributor's name is non-UTF-8
|
||||||
@ -160,6 +164,7 @@ make %{?_smp_mflags} check VERBOSE=yes
|
|||||||
- Fix compilation of a parenthesized comment (bug #1210410)
|
- Fix compilation of a parenthesized comment (bug #1210410)
|
||||||
- Fix compliation of mutual recursion inside a lookbehind assertion
|
- Fix compliation of mutual recursion inside a lookbehind assertion
|
||||||
(bug #1210417)
|
(bug #1210417)
|
||||||
|
- Fix pcregrep loop when \K is used in a lookbehind assertion (bug #1210423)
|
||||||
|
|
||||||
* Thu Mar 26 2015 Petr Pisar <ppisar@redhat.com> - 8.36-4
|
* Thu Mar 26 2015 Petr Pisar <ppisar@redhat.com> - 8.36-4
|
||||||
- Fix computing size of JIT read-only data (bug #1206131)
|
- Fix computing size of JIT read-only data (bug #1206131)
|
||||||
|
Loading…
Reference in New Issue
Block a user