From 9bd723e9c98fa03b1f3bf40d8d34aecebf1e3713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Thu, 9 Apr 2015 18:51:34 +0200 Subject: [PATCH] Fix pcregrep loop when \K is used in a lookbehind assertion --- ...p-when-K-is-used-in-a-lookbehind-ass.patch | 211 ++++++++++++++++++ pcre.spec | 5 + 2 files changed, 216 insertions(+) create mode 100644 pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch diff --git a/pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch b/pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch new file mode 100644 index 0000000..fd7ef9f --- /dev/null +++ b/pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch @@ -0,0 +1,211 @@ +From 8f907e8731b5eb4aca840a1979e76eed1b5c8175 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 7 Apr 2015 15:52:11 +0000 +Subject: [PATCH] Fix pcregrep loop when \K is used in a lookbehind assertion. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Upstream commit ported to 8.36: + +commit b4332d7dd831b3547b3f541495de4a79554e538e +Author: ph10 +Date: Tue Apr 7 15:52:11 2015 +0000 + + Fix pcregrep loop when \K is used in a lookbehind assertion. + + git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1543 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Signed-off-by: Petr Písař +--- + RunGrepTest | 5 +++ + pcregrep.c | 109 ++++++++++++++++++++++++++++++++++------------------ + testdata/grepoutput | 8 ++++ + 3 files changed, 85 insertions(+), 37 deletions(-) + +diff --git a/RunGrepTest b/RunGrepTest +index f1b0348..766278b 100755 +--- a/RunGrepTest ++++ b/RunGrepTest +@@ -506,6 +506,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes + (cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1 + echo "RC=$?" >>testtrygrep + ++echo "---------------------------- Test 107 -----------------------------" >>testtrygrep ++echo "a" >testtemp1grep ++echo "aaaaa" >>testtemp1grep ++(cd $srcdir; $valgrind $pcregrep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1 ++echo "RC=$?" >>testtrygrep + + # Now compare the results. + +diff --git a/pcregrep.c b/pcregrep.c +index 4f7fa38..b1af129 100644 +--- a/pcregrep.c ++++ b/pcregrep.c +@@ -1582,11 +1582,14 @@ while (ptr < endptr) + int endlinelength; + int mrc = 0; + int startoffset = 0; ++ int prevoffsets[2]; + unsigned int options = 0; + BOOL match; + char *matchptr = ptr; + char *t = ptr; + size_t length, linelength; ++ ++ prevoffsets[0] = prevoffsets[1] = -1; + + /* At this point, ptr is at the start of a line. We need to find the length + of the subject string to pass to pcre_exec(). In multiline mode, it is the +@@ -1729,55 +1732,86 @@ while (ptr < endptr) + { + if (!invert) + { +- if (printname != NULL) fprintf(stdout, "%s:", printname); +- if (number) fprintf(stdout, "%d:", linenumber); +- +- /* Handle --line-offsets */ +- +- if (line_offsets) +- fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), +- offsets[1] - offsets[0]); +- +- /* Handle --file-offsets */ +- +- else if (file_offsets) +- fprintf(stdout, "%d,%d\n", +- (int)(filepos + matchptr + offsets[0] - ptr), +- offsets[1] - offsets[0]); +- +- /* Handle --only-matching, which may occur many times */ +- +- else ++ int oldstartoffset = startoffset; ++ ++ /* It is possible, when a lookbehind assertion contains \K, for the ++ same string to be found again. The code below advances startoffset, but ++ until it is past the "bumpalong" offset that gave the match, the same ++ substring will be returned. The PCRE1 library does not return the ++ bumpalong offset, so all we can do is ignore repeated strings. (PCRE2 ++ does this better.) */ ++ ++ if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1]) + { +- BOOL printed = FALSE; +- omstr *om; +- +- for (om = only_matching; om != NULL; om = om->next) ++ prevoffsets[0] = offsets[0]; ++ prevoffsets[1] = offsets[1]; ++ ++ if (printname != NULL) fprintf(stdout, "%s:", printname); ++ if (number) fprintf(stdout, "%d:", linenumber); ++ ++ /* Handle --line-offsets */ ++ ++ if (line_offsets) ++ fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), ++ offsets[1] - offsets[0]); ++ ++ /* Handle --file-offsets */ ++ ++ else if (file_offsets) ++ fprintf(stdout, "%d,%d\n", ++ (int)(filepos + matchptr + offsets[0] - ptr), ++ offsets[1] - offsets[0]); ++ ++ /* Handle --only-matching, which may occur many times */ ++ ++ else + { +- int n = om->groupnum; +- if (n < mrc) ++ BOOL printed = FALSE; ++ omstr *om; ++ ++ for (om = only_matching; om != NULL; om = om->next) + { +- int plen = offsets[2*n + 1] - offsets[2*n]; +- if (plen > 0) ++ int n = om->groupnum; ++ if (n < mrc) + { +- if (printed) fprintf(stdout, "%s", om_separator); +- if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); +- FWRITE(matchptr + offsets[n*2], 1, plen, stdout); +- if (do_colour) fprintf(stdout, "%c[00m", 0x1b); +- printed = TRUE; ++ int plen = offsets[2*n + 1] - offsets[2*n]; ++ if (plen > 0) ++ { ++ if (printed) fprintf(stdout, "%s", om_separator); ++ if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); ++ FWRITE(matchptr + offsets[n*2], 1, plen, stdout); ++ if (do_colour) fprintf(stdout, "%c[00m", 0x1b); ++ printed = TRUE; ++ } + } + } ++ ++ if (printed || printname != NULL || number) fprintf(stdout, "\n"); + } +- +- if (printed || printname != NULL || number) fprintf(stdout, "\n"); +- } +- +- /* Prepare to repeat to find the next match */ ++ } ++ ++ /* Prepare to repeat to find the next match. If the patterned contained ++ a lookbehind tht included \K, it is possible that the end of the match ++ might be at or before the actual strting offset we have just used. We ++ need to start one character further on. Unfortunately, for unanchored ++ patterns, the actual start offset can be greater that the one that was ++ set as a result of "bumpalong". PCRE1 does not return the actual start ++ offset, so we have to check against the original start offset. This may ++ lead to duplicates - we we need the fudge above to avoid printing them. ++ (PCRE2 does this better.) */ + + match = FALSE; + if (line_buffered) fflush(stdout); + rc = 0; /* Had some success */ + startoffset = offsets[1]; /* Restart after the match */ ++ if (startoffset <= oldstartoffset) ++ { ++ if ((size_t)startoffset >= length) ++ goto END_ONE_MATCH; /* We were at the end */ ++ startoffset = oldstartoffset + 1; ++ if (utf8) ++ while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; ++ } + goto ONLY_MATCHING_RESTART; + } + } +@@ -1974,6 +2008,7 @@ while (ptr < endptr) + /* Advance to after the newline and increment the line number. The file + offset to the current line is maintained in filepos. */ + ++ END_ONE_MATCH: + ptr += linelength + endlinelength; + filepos += (int)(linelength + endlinelength); + linenumber++; +diff --git a/testdata/grepoutput b/testdata/grepoutput +index 9bf9d9d..4d61752 100644 +--- a/testdata/grepoutput ++++ b/testdata/grepoutput +@@ -743,3 +743,11 @@ RC=0 + ---------------------------- Test 106 ----------------------------- + a + RC=0 ++---------------------------- Test 107 ----------------------------- ++1:0,1 ++2:0,1 ++2:1,1 ++2:2,1 ++2:3,1 ++2:4,1 ++RC=0 +-- +2.1.0 + diff --git a/pcre.spec b/pcre.spec index e4e3704..14bdc58 100644 --- a/pcre.spec +++ b/pcre.spec @@ -34,6 +34,9 @@ Patch7: pcre-8.36-Fix-comment-between-subroutine-call-and-quantifier-b.patch # Fix compliation of mutual recursion inside a lookbehind assertion, # bug #1210417, in upstream after 8.36 Patch8: pcre-8.36-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch +# Fix pcregrep loop when \K is used in a lookbehind assertion, bug #1210423, +# in upstream after 8.36 +Patch9: pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch BuildRequires: readline-devel # New libtool to get rid of rpath BuildRequires: autoconf, automake, libtool @@ -82,6 +85,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest. %patch6 -p1 -b .mutual_recursion %patch7 -p1 -b .parenthesized_comment %patch8 -p1 -b .mutual_recursion_in_assertion +%patch9 -p1 -b .pcregrep_k_loop # Because of rpath patch libtoolize --copy --force && autoreconf -vif # One contributor's name is non-UTF-8 @@ -160,6 +164,7 @@ make %{?_smp_mflags} check VERBOSE=yes - Fix compilation of a parenthesized comment (bug #1210410) - Fix compliation of mutual recursion inside a lookbehind assertion (bug #1210417) +- Fix pcregrep loop when \K is used in a lookbehind assertion (bug #1210423) * Thu Mar 26 2015 Petr Pisar - 8.36-4 - Fix computing size of JIT read-only data (bug #1206131)