384 lines
14 KiB
Diff
384 lines
14 KiB
Diff
|
From eff8c9e117259b1192919b85c2ee03a27b164f1a Mon Sep 17 00:00:00 2001
|
|||
|
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
|||
|
Date: Mon, 13 Nov 2017 16:52:39 +0000
|
|||
|
Subject: [PATCH] Fix multiple multiline matching issues in pcre2grep.
|
|||
|
MIME-Version: 1.0
|
|||
|
Content-Type: text/plain; charset=UTF-8
|
|||
|
Content-Transfer-Encoding: 8bit
|
|||
|
|
|||
|
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@878 6239d852-aaf2-0410-a92c-79f79f948069
|
|||
|
|
|||
|
Petr Písař: Ported to 10.30.
|
|||
|
|
|||
|
diff --git a/RunGrepTest b/RunGrepTest
|
|||
|
index cf88c78..a7496cb 100755
|
|||
|
--- a/RunGrepTest
|
|||
|
+++ b/RunGrepTest
|
|||
|
@@ -248,7 +248,7 @@ echo "---------------------------- Test 35 -----------------------------" >>test
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
|
|||
|
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
|
|||
|
@@ -391,6 +391,12 @@ echo "RC=$?" >>testtrygrep
|
|||
|
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
|
|||
|
(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
|||
|
+echo "RC=$?" >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
|||
|
+echo "RC=$?" >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
|||
|
+echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
|
|||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
|||
|
@@ -494,25 +500,25 @@ echo "---------------------------- Test 95 -----------------------------" >>test
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
|
|||
|
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinputM 'fox' ./test* | sort) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
|
|||
|
echo "grepinput$" >testtemp1grep
|
|||
|
echo "grepinput8" >>testtemp1grep
|
|||
|
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
|
|||
|
echo "grepinput$" >testtemp1grep
|
|||
|
echo "grepinput8" >>testtemp1grep
|
|||
|
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinputM --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
|
|||
|
echo "grepinput$" >testtemp1grep
|
|||
|
echo "grepinput8" >testtemp2grep
|
|||
|
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
|
|||
|
@@ -582,7 +588,7 @@ echo "---------------------------- Test 115 -----------------------------" >>tes
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 116 -----------------------------" >>testtrygrep
|
|||
|
-(cd $srcdir; $valgrind $vjs $pcre2grep -th 'the' testdata/grepinput*) >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinputM -th 'the' testdata/grepinput*) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
echo "---------------------------- Test 117 -----------------------------" >>testtrygrep
|
|||
|
@@ -610,10 +616,20 @@ echo "---------------------------- Test 122 -----------------------------" >>tes
|
|||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -w 'cat|dog' testdata/grepinputv) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
-echo "---------------------------- Test 122 -----------------------------" >>testtrygrep
|
|||
|
+echo "---------------------------- Test 123 -----------------------------" >>testtrygrep
|
|||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -w 'dog|cat' testdata/grepinputv) >>testtrygrep
|
|||
|
echo "RC=$?" >>testtrygrep
|
|||
|
|
|||
|
+echo "---------------------------- Test 124 -----------------------------" >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always 'start[\s]+end' testdata/grepinputM) >>testtrygrep
|
|||
|
+echo "RC=$?" >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
|
|||
|
+echo "RC=$?" >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
|
|||
|
+echo "RC=$?" >>testtrygrep
|
|||
|
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
|
|||
|
+echo "RC=$?" >>testtrygrep
|
|||
|
+
|
|||
|
|
|||
|
# Now compare the results.
|
|||
|
|
|||
|
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
|
|||
|
index 1649d5a..bec07e1 100644
|
|||
|
--- a/src/pcre2grep.c
|
|||
|
+++ b/src/pcre2grep.c
|
|||
|
@@ -2505,7 +2505,10 @@ while (ptr < endptr)
|
|||
|
match = match_patterns(ptr, length, options, startoffset, &mrc);
|
|||
|
options = PCRE2_NOTEMPTY;
|
|||
|
|
|||
|
- /* If it's a match or a not-match (as required), do what's wanted. */
|
|||
|
+ /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
|
|||
|
+ only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
|
|||
|
+ return code - to output data lines, so that binary zeroes are treated as just
|
|||
|
+ another data character. */
|
|||
|
|
|||
|
if (match != invert)
|
|||
|
{
|
|||
|
@@ -2734,27 +2737,6 @@ while (ptr < endptr)
|
|||
|
if (printname != NULL) fprintf(stdout, "%s:", printname);
|
|||
|
if (number) fprintf(stdout, "%d:", linenumber);
|
|||
|
|
|||
|
- /* In multiline mode, we want to print to the end of the line in which
|
|||
|
- the end of the matched string is found, so we adjust linelength and the
|
|||
|
- line number appropriately, but only when there actually was a match
|
|||
|
- (invert not set). Because the PCRE2_FIRSTLINE option is set, the start of
|
|||
|
- the match will always be before the first newline sequence. */
|
|||
|
-
|
|||
|
- if (multiline & !invert)
|
|||
|
- {
|
|||
|
- char *endmatch = ptr + offsets[1];
|
|||
|
- t = ptr;
|
|||
|
- while (t <= endmatch)
|
|||
|
- {
|
|||
|
- t = end_of_line(t, endptr, &endlinelength);
|
|||
|
- if (t < endmatch) linenumber++; else break;
|
|||
|
- }
|
|||
|
- linelength = t - ptr - endlinelength;
|
|||
|
- }
|
|||
|
-
|
|||
|
- /*** NOTE: Use only fwrite() to output the data line, so that binary
|
|||
|
- zeroes are treated as just another data character. */
|
|||
|
-
|
|||
|
/* This extra option, for Jeffrey Friedl's debugging requirements,
|
|||
|
replaces the matched string, or a specific captured string if it exists,
|
|||
|
with X. When this happens, colouring is ignored. */
|
|||
|
@@ -2771,20 +2753,48 @@ while (ptr < endptr)
|
|||
|
else
|
|||
|
#endif
|
|||
|
|
|||
|
- /* We have to split the line(s) up if colouring, and search for further
|
|||
|
- matches, but not of course if the line is a non-match. */
|
|||
|
+ /* In multiline mode, or if colouring, we have to split the line(s) up
|
|||
|
+ and search for further matches, but not of course if the line is a
|
|||
|
+ non-match. In multiline mode this is necessary in case there is another
|
|||
|
+ match that spans the end of the current line. When colouring we want to
|
|||
|
+ colour all matches. */
|
|||
|
|
|||
|
- if (do_colour && !invert)
|
|||
|
+ if ((multiline || do_colour) && !invert)
|
|||
|
{
|
|||
|
int plength;
|
|||
|
FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
|
|||
|
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
|
|||
|
for (;;)
|
|||
|
{
|
|||
|
- startoffset = offsets[1];
|
|||
|
- if (startoffset >= linelength + endlinelength ||
|
|||
|
- !match_patterns(ptr, length, options, startoffset, &mrc))
|
|||
|
- break;
|
|||
|
+ startoffset = offsets[1]; /* Advance after previous match. */
|
|||
|
+
|
|||
|
+ /* If the current match ended past the end of the line (only possible
|
|||
|
+ in multiline mode), we must move on to the line in which it did end
|
|||
|
+ before searching for more matches. Because the PCRE2_FIRSTLINE option
|
|||
|
+ is set, the start of the match will always be before the first
|
|||
|
+ newline sequence. */
|
|||
|
+
|
|||
|
+ while (startoffset > linelength + endlinelength)
|
|||
|
+ {
|
|||
|
+ ptr += linelength + endlinelength;
|
|||
|
+ filepos += (int)(linelength + endlinelength);
|
|||
|
+ linenumber++;
|
|||
|
+ startoffset -= (int)(linelength + endlinelength);
|
|||
|
+ t = end_of_line(ptr, endptr, &endlinelength);
|
|||
|
+ linelength = t - ptr - endlinelength;
|
|||
|
+ length = (size_t)(endptr - ptr);
|
|||
|
+ }
|
|||
|
+
|
|||
|
+ /* If startoffset is at the exact end of the line it means this
|
|||
|
+ complete line was the final part of the match, so there is nothing
|
|||
|
+ more to do. */
|
|||
|
+
|
|||
|
+ if (startoffset == linelength + endlinelength) break;
|
|||
|
+
|
|||
|
+ /* Otherwise, run a match from within the final line, and if found,
|
|||
|
+ loop for any that may follow. */
|
|||
|
+
|
|||
|
+ if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
|
|||
|
FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout);
|
|||
|
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
|
|||
|
}
|
|||
|
@@ -2797,7 +2807,7 @@ while (ptr < endptr)
|
|||
|
if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout);
|
|||
|
}
|
|||
|
|
|||
|
- /* Not colouring; no need to search for further matches */
|
|||
|
+ /* Not colouring or multiline; no need to search for further matches. */
|
|||
|
|
|||
|
else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
|
|||
|
}
|
|||
|
diff --git a/testdata/grepinputM b/testdata/grepinputM
|
|||
|
new file mode 100644
|
|||
|
index 0000000..9119e3d
|
|||
|
--- /dev/null
|
|||
|
+++ b/testdata/grepinputM
|
|||
|
@@ -0,0 +1,17 @@
|
|||
|
+Data file for multiline tests of multiple matches.
|
|||
|
+
|
|||
|
+start end in between start
|
|||
|
+end and following
|
|||
|
+Other stuff
|
|||
|
+
|
|||
|
+start end in between start
|
|||
|
+end and following start
|
|||
|
+end other stuff
|
|||
|
+
|
|||
|
+start end in between start
|
|||
|
+
|
|||
|
+end
|
|||
|
+
|
|||
|
+** These two lines must be last.
|
|||
|
+start end in between start
|
|||
|
+end
|
|||
|
diff --git a/testdata/grepoutput b/testdata/grepoutput
|
|||
|
index 52e0d17..7e963fb 100644
|
|||
|
--- a/testdata/grepoutput
|
|||
|
+++ b/testdata/grepoutput
|
|||
|
@@ -487,6 +487,7 @@ RC=0
|
|||
|
./testdata/grepinput:456
|
|||
|
./testdata/grepinput3:0
|
|||
|
./testdata/grepinput8:0
|
|||
|
+./testdata/grepinputM:0
|
|||
|
./testdata/grepinputv:1
|
|||
|
./testdata/grepinputx:0
|
|||
|
RC=0
|
|||
|
@@ -600,6 +601,33 @@ RC=0
|
|||
|
[0m[1;31mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
|
|||
|
[0mRC=0
|
|||
|
+1:[1;31mtriple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+[0m6:[1;31mtriple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+[0m8:[1;31mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+[0m13:[1;31mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+[0mRC=0
|
|||
|
+triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+RC=0
|
|||
|
+1:triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+6:triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+8:triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+13:triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
|
|||
|
+
|
|||
|
+RC=0
|
|||
|
---------------------------- Test 71 -----------------------------
|
|||
|
01
|
|||
|
RC=0
|
|||
|
@@ -793,21 +821,23 @@ RC=0
|
|||
|
37216,12
|
|||
|
RC=0
|
|||
|
---------------------------- Test 113 -----------------------------
|
|||
|
-478
|
|||
|
+480
|
|||
|
RC=0
|
|||
|
---------------------------- Test 114 -----------------------------
|
|||
|
testdata/grepinput:469
|
|||
|
testdata/grepinput3:0
|
|||
|
testdata/grepinput8:0
|
|||
|
+testdata/grepinputM:2
|
|||
|
testdata/grepinputv:3
|
|||
|
testdata/grepinputx:6
|
|||
|
-TOTAL:478
|
|||
|
+TOTAL:480
|
|||
|
RC=0
|
|||
|
---------------------------- Test 115 -----------------------------
|
|||
|
testdata/grepinput:469
|
|||
|
+testdata/grepinputM:2
|
|||
|
testdata/grepinputv:3
|
|||
|
testdata/grepinputx:6
|
|||
|
-TOTAL:478
|
|||
|
+TOTAL:480
|
|||
|
RC=0
|
|||
|
---------------------------- Test 116 -----------------------------
|
|||
|
478
|
|||
|
@@ -816,9 +846,10 @@ RC=0
|
|||
|
469
|
|||
|
0
|
|||
|
0
|
|||
|
+2
|
|||
|
3
|
|||
|
6
|
|||
|
-478
|
|||
|
+480
|
|||
|
RC=0
|
|||
|
---------------------------- Test 118 -----------------------------
|
|||
|
testdata/grepinput3
|
|||
|
@@ -846,7 +877,62 @@ RC=0
|
|||
|
over the lazy dog.
|
|||
|
The word is cat in this line
|
|||
|
RC=0
|
|||
|
----------------------------- Test 122 -----------------------------
|
|||
|
+---------------------------- Test 123 -----------------------------
|
|||
|
over the lazy dog.
|
|||
|
The word is cat in this line
|
|||
|
RC=0
|
|||
|
+---------------------------- Test 124 -----------------------------
|
|||
|
+3:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+end[0m and following
|
|||
|
+7:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+end[0m and following [1;31mstart
|
|||
|
+end[0m other stuff
|
|||
|
+11:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+
|
|||
|
+end[0m
|
|||
|
+16:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+end[0m
|
|||
|
+RC=0
|
|||
|
+3:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+end[0m and following
|
|||
|
+5-Other stuff
|
|||
|
+6-
|
|||
|
+7:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+end[0m and following [1;31mstart
|
|||
|
+end[0m other stuff
|
|||
|
+10-
|
|||
|
+11:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+
|
|||
|
+end[0m
|
|||
|
+14-
|
|||
|
+15-** These two lines must be last.
|
|||
|
+16:[1;31mstart end[0m in between [1;31mstart
|
|||
|
+end[0m
|
|||
|
+RC=0
|
|||
|
+3:start end in between start
|
|||
|
+end and following
|
|||
|
+7:start end in between start
|
|||
|
+end and following start
|
|||
|
+end other stuff
|
|||
|
+11:start end in between start
|
|||
|
+
|
|||
|
+end
|
|||
|
+16:start end in between start
|
|||
|
+end
|
|||
|
+RC=0
|
|||
|
+3:start end in between start
|
|||
|
+end and following
|
|||
|
+5-Other stuff
|
|||
|
+6-
|
|||
|
+7:start end in between start
|
|||
|
+end and following start
|
|||
|
+end other stuff
|
|||
|
+10-
|
|||
|
+11:start end in between start
|
|||
|
+
|
|||
|
+end
|
|||
|
+14-
|
|||
|
+15-** These two lines must be last.
|
|||
|
+16:start end in between start
|
|||
|
+end
|
|||
|
+RC=0
|
|||
|
--
|
|||
|
2.13.6
|
|||
|
|