From 308fe6b56afd667f73e93b89bba91538a7e3a911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Mon, 20 Jun 2016 10:28:49 +0200 Subject: [PATCH] Fix repeated pcregrep output if -o with -M options were used and the match extended over a line boundary --- pcre2-10.21-Documentation-clarification.patch | 50 ++++ ...raction-between-o-and-M-in-pcre2grep.patch | 250 ++++++++++++++++++ pcre2.spec | 15 +- 3 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 pcre2-10.21-Documentation-clarification.patch create mode 100644 pcre2-10.21-Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch diff --git a/pcre2-10.21-Documentation-clarification.patch b/pcre2-10.21-Documentation-clarification.patch new file mode 100644 index 0000000..f23b426 --- /dev/null +++ b/pcre2-10.21-Documentation-clarification.patch @@ -0,0 +1,50 @@ +From 6b1349313442390aed681582475f17956c73c9a3 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sun, 19 Jun 2016 16:07:56 +0000 +Subject: [PATCH] Documentation clarification. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 10.21: + +commit cc7cc5af44c220acad1b9addff852a26c0e8ec46 +Author: ph10 +Date: Sun Jun 19 16:07:56 2016 +0000 + + Documentation clarification. + + git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@529 6239d852-aaf2-0410-a92c-79f79f948069 + +Signed-off-by: Petr Písař +--- + doc/pcre2grep.1 | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 +index 028a91e..f7fc40b 100644 +--- a/doc/pcre2grep.1 ++++ b/doc/pcre2grep.1 +@@ -440,10 +440,15 @@ one line. The first is the line in which the match started, and the last is the + line in which the match ended. If the matched string ends with a newline + sequence the output ends at the end of that line. + .sp +-When this option is set, the PCRE2 library is called in "multiline" mode. +-However, \fBpcre2grep\fP still processes the input line by line. The difference +-is that a matched string may extend past the end of a line and continue on +-one or more subsequent lines. The newline sequence must be matched as part of ++When this option is set, the PCRE2 library is called in "multiline" mode. This ++allows a matched string to extend past the end of a line and continue on one or ++more subsequent lines. However, \fBpcre2grep\fP still processes the input line ++by line. Once a match has been handled, scanning restarts at the beginning of ++the next line, just as it does when \fB-M\fP is not present. This means that it ++is possible for the second or subsequent lines in a multiline match to be ++output again as part of another match. ++.sp ++The newline sequence that separates multiple lines must be matched as part of + the pattern. For example, to find the phrase "regular expression" in a file + where "regular" might be at the end of a line and "expression" at the start of + the next line, you could use this command: +-- +2.5.5 + diff --git a/pcre2-10.21-Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch b/pcre2-10.21-Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch new file mode 100644 index 0000000..bf8c04c --- /dev/null +++ b/pcre2-10.21-Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch @@ -0,0 +1,250 @@ +From 05686e02ce782f3eee730abcdbe650b17efa2fb1 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Fri, 17 Jun 2016 17:37:26 +0000 +Subject: [PATCH] Fix bad interaction between -o and -M in pcre2grep. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 10.21: + +commit f5e35c292ede59a1d1d32f2f7ee3894515ccc5d6 +Author: ph10 +Date: Fri Jun 17 17:37:26 2016 +0000 + + Fix bad interaction between -o and -M in pcre2grep. + + git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@528 6239d852-aaf2-0410-a92c-79f79f948069 + +Signed-off-by: Petr Písař +--- + RunGrepTest | 12 ++++++++++++ + src/pcre2grep.c | 36 ++++++++++++++++++++++-------------- + testdata/grepinput | 13 +++++++++++++ + testdata/grepoutput | 49 ++++++++++++++++++++++++++++++++++++++++--------- + 4 files changed, 87 insertions(+), 23 deletions(-) + +diff --git a/RunGrepTest b/RunGrepTest +index 67d672b..6bdd157 100755 +--- a/RunGrepTest ++++ b/RunGrepTest +@@ -551,6 +551,18 @@ echo "---------------------------- Test 109 -----------------------------" >>tes + (cd $srcdir; $valgrind $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep + echo "RC=$?" >>testtrygrep + ++echo "---------------------------- Test 110 -----------------------------" >>testtrygrep ++(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep ++echo "RC=$?" >>testtrygrep ++ ++echo "---------------------------- Test 111 -----------------------------" >>testtrygrep ++(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep ++echo "RC=$?" >>testtrygrep ++ ++echo "---------------------------- Test 112 -----------------------------" >>testtrygrep ++(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep ++echo "RC=$?" >>testtrygrep ++ + # Now compare the results. + + $cf $srcdir/testdata/grepoutput testtrygrep +diff --git a/src/pcre2grep.c b/src/pcre2grep.c +index 703f675..3a5efd4 100644 +--- a/src/pcre2grep.c ++++ b/src/pcre2grep.c +@@ -1591,7 +1591,7 @@ while (ptr < endptr) + size_t startoffset = 0; + + /* At this point, ptr is at the start of a line. We need to find the length +- of the subject string to pass to pcre_exec(). In multiline mode, it is the ++ of the subject string to pass to pcre2_match(). In multiline mode, it is the + length remainder of the data in the buffer. Otherwise, it is the length of + the next line, excluding the terminating newline. After matching, we always + advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE +@@ -1680,7 +1680,7 @@ while (ptr < endptr) + + match = match_patterns(matchptr, length, options, startoffset, &mrc); + options = PCRE2_NOTEMPTY; +- ++ + /* If it's a match or a not-match (as required), do what's wanted. */ + + if (match != invert) +@@ -1776,14 +1776,22 @@ while (ptr < endptr) + if (printed || printname != NULL || number) fprintf(stdout, "\n"); + } + +- /* Prepare to repeat to find the next match. If the pattern contained a +- lookbehind that included \K, it is possible that the end of the match +- might be at or before the actual starting offset we have just used. In +- this case, start one character further on. */ ++ /* Prepare to repeat to find the next match in the line. */ + + match = FALSE; + if (line_buffered) fflush(stdout); + rc = 0; /* Had some success */ ++ ++ /* If the current match ended past the end of the line (only possible ++ in multiline mode), we are done with this line. */ ++ ++ if (offsets[1] > linelength) goto END_ONE_MATCH; ++ ++ /* If the pattern contained a lookbehind that included \K, it is ++ possible that the end of the match might be at or before the actual ++ starting offset we have just used. In this case, start one character ++ further on. */ ++ + startoffset = offsets[1]; /* Restart after the match */ + oldstartoffset = pcre2_get_startchar(match_data); + if (startoffset <= oldstartoffset) +@@ -2488,24 +2496,24 @@ if ((popts & PO_FIXED_STRINGS) != 0) + } + + sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); +-p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset, +- compile_context); +- ++p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode, ++ &erroffset, compile_context); ++ + /* Handle successful compile */ +- +-if (p->compiled != NULL) ++ ++if (p->compiled != NULL) + { + #ifdef SUPPORT_PCRE2GREP_JIT + if (use_jit) + { + errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE); + if (errcode == 0) return TRUE; +- erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */ ++ erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */ + } +- else ++ else + #endif + return TRUE; +- } ++ } + + /* Handle compile and JIT compile errors */ + +diff --git a/testdata/grepinput b/testdata/grepinput +index 0f00edd..b01643d 100644 +--- a/testdata/grepinput ++++ b/testdata/grepinput +@@ -604,6 +604,19 @@ AB.VE the turtle + + 010203040506 + ++match 1: ++ a ++match 2: ++ b ++match 3: ++ c ++match 4: ++ d ++match 5: ++ e ++Rhubarb ++Custard Tart ++ + PUT NEW DATA ABOVE THIS LINE. + ============================= + +diff --git a/testdata/grepoutput b/testdata/grepoutput +index 3f6704c..552bd70 100644 +--- a/testdata/grepoutput ++++ b/testdata/grepoutput +@@ -10,7 +10,7 @@ RC=0 + 7:PATTERN at the start of a line. + 8:In the middle of a line, PATTERN appears. + 10:This pattern is in lower case. +-610:Check up on PATTERN near the end. ++623:Check up on PATTERN near the end. + RC=0 + ---------------------------- Test 4 ------------------------------ + 4 +@@ -19,7 +19,7 @@ RC=0 + ./testdata/grepinput:7:PATTERN at the start of a line. + ./testdata/grepinput:8:In the middle of a line, PATTERN appears. + ./testdata/grepinput:10:This pattern is in lower case. +-./testdata/grepinput:610:Check up on PATTERN near the end. ++./testdata/grepinput:623:Check up on PATTERN near the end. + ./testdata/grepinputx:3:Here is the pattern again. + ./testdata/grepinputx:5:Pattern + ./testdata/grepinputx:42:This line contains pattern not on a line by itself. +@@ -28,7 +28,7 @@ RC=0 + 7:PATTERN at the start of a line. + 8:In the middle of a line, PATTERN appears. + 10:This pattern is in lower case. +-610:Check up on PATTERN near the end. ++623:Check up on PATTERN near the end. + 3:Here is the pattern again. + 5:Pattern + 42:This line contains pattern not on a line by itself. +@@ -324,10 +324,10 @@ RC=0 + ./testdata/grepinput-9- + ./testdata/grepinput:10:This pattern is in lower case. + -- +-./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE. +-./testdata/grepinput-608-============================= +-./testdata/grepinput-609- +-./testdata/grepinput:610:Check up on PATTERN near the end. ++./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE. ++./testdata/grepinput-621-============================= ++./testdata/grepinput-622- ++./testdata/grepinput:623:Check up on PATTERN near the end. + -- + ./testdata/grepinputx-1-This is a second file of input for the pcregrep tests. + ./testdata/grepinputx-2- +@@ -349,8 +349,8 @@ RC=0 + ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long. + ./testdata/grepinput-13- + -- +-./testdata/grepinput:610:Check up on PATTERN near the end. +-./testdata/grepinput-611-This is the last line of this file. ++./testdata/grepinput:623:Check up on PATTERN near the end. ++./testdata/grepinput-624-This is the last line of this file. + -- + ./testdata/grepinputx:3:Here is the pattern again. + ./testdata/grepinputx-4- +@@ -755,3 +755,34 @@ RC=0 + RC=0 + ---------------------------- Test 109 ----------------------------- + RC=0 ++---------------------------- Test 110 ----------------------------- ++match 1: ++ a ++/1/a ++match 2: ++ b ++/2/b ++match 3: ++ c ++/3/c ++match 4: ++ d ++/4/d ++match 5: ++ e ++/5/e ++RC=0 ++---------------------------- Test 111 ----------------------------- ++607:0,12 ++609:0,12 ++611:0,12 ++613:0,12 ++615:0,12 ++RC=0 ++---------------------------- Test 112 ----------------------------- ++37168,12 ++37180,12 ++37192,12 ++37204,12 ++37216,12 ++RC=0 +-- +2.5.5 + diff --git a/pcre2.spec b/pcre2.spec index b35feae..47413f4 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -2,7 +2,7 @@ #%%global rcversion RC1 Name: pcre2 Version: 10.21 -Release: %{?rcversion:0.}5%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library Group: System Environment/Libraries @@ -36,6 +36,13 @@ Patch5: pcre2-10.21-A-racing-condition-is-fixed-in-JIT-reported-by-Mozil.pat Patch6: pcre2-10.21-Fix-typo-in-test-program.patch # Enable JIT in the pcre2grep tool, fixed in upstream after 10.21 Patch7: pcre2-10.21-Make-pcre2grep-use-JIT-it-was-omitted-by-mistake.patch +# Fix repeated pcregrep output if -o with -M options were used and the match +# extended over a line boundary, upstream bug #1848, fixed in upstream after +# 10.21 +Patch8: pcre2-10.21-Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch +# Documentation for Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch, +# upstream bug #1848, fixed in upstream after 10.21 +Patch9: pcre2-10.21-Documentation-clarification.patch # New libtool to get rid of RPATH and to use distribution autotools BuildRequires: autoconf @@ -121,6 +128,8 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch5 -p1 %patch6 -p1 %patch7 -p1 +%patch8 -p1 +%patch9 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -216,6 +225,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Mon Jun 20 2016 Petr Pisar - 10.21-6 +- Fix repeated pcregrep output if -o with -M options were used and the match + extended over a line boundary (upstream bug #1848) + * Fri Jun 03 2016 Petr Pisar - 10.21-5 - Fix a race in JIT locking condition - Fix an ovector check in JIT test program