Fix repeated pcregrep output if -o with -M options were used and the match extended over a line boundary
This commit is contained in:
parent
6ca7a15ebe
commit
308fe6b56a
50
pcre2-10.21-Documentation-clarification.patch
Normal file
50
pcre2-10.21-Documentation-clarification.patch
Normal file
@ -0,0 +1,50 @@
|
||||
From 6b1349313442390aed681582475f17956c73c9a3 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Sun, 19 Jun 2016 16:07:56 +0000
|
||||
Subject: [PATCH] Documentation clarification.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Ported to 10.21:
|
||||
|
||||
commit cc7cc5af44c220acad1b9addff852a26c0e8ec46
|
||||
Author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Sun Jun 19 16:07:56 2016 +0000
|
||||
|
||||
Documentation clarification.
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@529 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
doc/pcre2grep.1 | 13 +++++++++----
|
||||
1 file changed, 9 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
|
||||
index 028a91e..f7fc40b 100644
|
||||
--- a/doc/pcre2grep.1
|
||||
+++ b/doc/pcre2grep.1
|
||||
@@ -440,10 +440,15 @@ one line. The first is the line in which the match started, and the last is the
|
||||
line in which the match ended. If the matched string ends with a newline
|
||||
sequence the output ends at the end of that line.
|
||||
.sp
|
||||
-When this option is set, the PCRE2 library is called in "multiline" mode.
|
||||
-However, \fBpcre2grep\fP still processes the input line by line. The difference
|
||||
-is that a matched string may extend past the end of a line and continue on
|
||||
-one or more subsequent lines. The newline sequence must be matched as part of
|
||||
+When this option is set, the PCRE2 library is called in "multiline" mode. This
|
||||
+allows a matched string to extend past the end of a line and continue on one or
|
||||
+more subsequent lines. However, \fBpcre2grep\fP still processes the input line
|
||||
+by line. Once a match has been handled, scanning restarts at the beginning of
|
||||
+the next line, just as it does when \fB-M\fP is not present. This means that it
|
||||
+is possible for the second or subsequent lines in a multiline match to be
|
||||
+output again as part of another match.
|
||||
+.sp
|
||||
+The newline sequence that separates multiple lines must be matched as part of
|
||||
the pattern. For example, to find the phrase "regular expression" in a file
|
||||
where "regular" might be at the end of a line and "expression" at the start of
|
||||
the next line, you could use this command:
|
||||
--
|
||||
2.5.5
|
||||
|
@ -0,0 +1,250 @@
|
||||
From 05686e02ce782f3eee730abcdbe650b17efa2fb1 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Fri, 17 Jun 2016 17:37:26 +0000
|
||||
Subject: [PATCH] Fix bad interaction between -o and -M in pcre2grep.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Ported to 10.21:
|
||||
|
||||
commit f5e35c292ede59a1d1d32f2f7ee3894515ccc5d6
|
||||
Author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Fri Jun 17 17:37:26 2016 +0000
|
||||
|
||||
Fix bad interaction between -o and -M in pcre2grep.
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@528 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
RunGrepTest | 12 ++++++++++++
|
||||
src/pcre2grep.c | 36 ++++++++++++++++++++++--------------
|
||||
testdata/grepinput | 13 +++++++++++++
|
||||
testdata/grepoutput | 49 ++++++++++++++++++++++++++++++++++++++++---------
|
||||
4 files changed, 87 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/RunGrepTest b/RunGrepTest
|
||||
index 67d672b..6bdd157 100755
|
||||
--- a/RunGrepTest
|
||||
+++ b/RunGrepTest
|
||||
@@ -551,6 +551,18 @@ echo "---------------------------- Test 109 -----------------------------" >>tes
|
||||
(cd $srcdir; $valgrind $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
+echo "---------------------------- Test 110 -----------------------------" >>testtrygrep
|
||||
+(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
|
||||
+echo "RC=$?" >>testtrygrep
|
||||
+
|
||||
+echo "---------------------------- Test 111 -----------------------------" >>testtrygrep
|
||||
+(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
|
||||
+echo "RC=$?" >>testtrygrep
|
||||
+
|
||||
+echo "---------------------------- Test 112 -----------------------------" >>testtrygrep
|
||||
+(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
|
||||
+echo "RC=$?" >>testtrygrep
|
||||
+
|
||||
# Now compare the results.
|
||||
|
||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
|
||||
index 703f675..3a5efd4 100644
|
||||
--- a/src/pcre2grep.c
|
||||
+++ b/src/pcre2grep.c
|
||||
@@ -1591,7 +1591,7 @@ while (ptr < endptr)
|
||||
size_t startoffset = 0;
|
||||
|
||||
/* At this point, ptr is at the start of a line. We need to find the length
|
||||
- of the subject string to pass to pcre_exec(). In multiline mode, it is the
|
||||
+ of the subject string to pass to pcre2_match(). In multiline mode, it is the
|
||||
length remainder of the data in the buffer. Otherwise, it is the length of
|
||||
the next line, excluding the terminating newline. After matching, we always
|
||||
advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
|
||||
@@ -1680,7 +1680,7 @@ while (ptr < endptr)
|
||||
|
||||
match = match_patterns(matchptr, length, options, startoffset, &mrc);
|
||||
options = PCRE2_NOTEMPTY;
|
||||
-
|
||||
+
|
||||
/* If it's a match or a not-match (as required), do what's wanted. */
|
||||
|
||||
if (match != invert)
|
||||
@@ -1776,14 +1776,22 @@ while (ptr < endptr)
|
||||
if (printed || printname != NULL || number) fprintf(stdout, "\n");
|
||||
}
|
||||
|
||||
- /* Prepare to repeat to find the next match. If the pattern contained a
|
||||
- lookbehind that included \K, it is possible that the end of the match
|
||||
- might be at or before the actual starting offset we have just used. In
|
||||
- this case, start one character further on. */
|
||||
+ /* Prepare to repeat to find the next match in the line. */
|
||||
|
||||
match = FALSE;
|
||||
if (line_buffered) fflush(stdout);
|
||||
rc = 0; /* Had some success */
|
||||
+
|
||||
+ /* If the current match ended past the end of the line (only possible
|
||||
+ in multiline mode), we are done with this line. */
|
||||
+
|
||||
+ if (offsets[1] > linelength) goto END_ONE_MATCH;
|
||||
+
|
||||
+ /* If the pattern contained a lookbehind that included \K, it is
|
||||
+ possible that the end of the match might be at or before the actual
|
||||
+ starting offset we have just used. In this case, start one character
|
||||
+ further on. */
|
||||
+
|
||||
startoffset = offsets[1]; /* Restart after the match */
|
||||
oldstartoffset = pcre2_get_startchar(match_data);
|
||||
if (startoffset <= oldstartoffset)
|
||||
@@ -2488,24 +2496,24 @@ if ((popts & PO_FIXED_STRINGS) != 0)
|
||||
}
|
||||
|
||||
sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
|
||||
-p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
|
||||
- compile_context);
|
||||
-
|
||||
+p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
|
||||
+ &erroffset, compile_context);
|
||||
+
|
||||
/* Handle successful compile */
|
||||
-
|
||||
-if (p->compiled != NULL)
|
||||
+
|
||||
+if (p->compiled != NULL)
|
||||
{
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
if (use_jit)
|
||||
{
|
||||
errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
|
||||
if (errcode == 0) return TRUE;
|
||||
- erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */
|
||||
+ erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */
|
||||
}
|
||||
- else
|
||||
+ else
|
||||
#endif
|
||||
return TRUE;
|
||||
- }
|
||||
+ }
|
||||
|
||||
/* Handle compile and JIT compile errors */
|
||||
|
||||
diff --git a/testdata/grepinput b/testdata/grepinput
|
||||
index 0f00edd..b01643d 100644
|
||||
--- a/testdata/grepinput
|
||||
+++ b/testdata/grepinput
|
||||
@@ -604,6 +604,19 @@ AB.VE the turtle
|
||||
|
||||
010203040506
|
||||
|
||||
+match 1:
|
||||
+ a
|
||||
+match 2:
|
||||
+ b
|
||||
+match 3:
|
||||
+ c
|
||||
+match 4:
|
||||
+ d
|
||||
+match 5:
|
||||
+ e
|
||||
+Rhubarb
|
||||
+Custard Tart
|
||||
+
|
||||
PUT NEW DATA ABOVE THIS LINE.
|
||||
=============================
|
||||
|
||||
diff --git a/testdata/grepoutput b/testdata/grepoutput
|
||||
index 3f6704c..552bd70 100644
|
||||
--- a/testdata/grepoutput
|
||||
+++ b/testdata/grepoutput
|
||||
@@ -10,7 +10,7 @@ RC=0
|
||||
7:PATTERN at the start of a line.
|
||||
8:In the middle of a line, PATTERN appears.
|
||||
10:This pattern is in lower case.
|
||||
-610:Check up on PATTERN near the end.
|
||||
+623:Check up on PATTERN near the end.
|
||||
RC=0
|
||||
---------------------------- Test 4 ------------------------------
|
||||
4
|
||||
@@ -19,7 +19,7 @@ RC=0
|
||||
./testdata/grepinput:7:PATTERN at the start of a line.
|
||||
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
|
||||
./testdata/grepinput:10:This pattern is in lower case.
|
||||
-./testdata/grepinput:610:Check up on PATTERN near the end.
|
||||
+./testdata/grepinput:623:Check up on PATTERN near the end.
|
||||
./testdata/grepinputx:3:Here is the pattern again.
|
||||
./testdata/grepinputx:5:Pattern
|
||||
./testdata/grepinputx:42:This line contains pattern not on a line by itself.
|
||||
@@ -28,7 +28,7 @@ RC=0
|
||||
7:PATTERN at the start of a line.
|
||||
8:In the middle of a line, PATTERN appears.
|
||||
10:This pattern is in lower case.
|
||||
-610:Check up on PATTERN near the end.
|
||||
+623:Check up on PATTERN near the end.
|
||||
3:Here is the pattern again.
|
||||
5:Pattern
|
||||
42:This line contains pattern not on a line by itself.
|
||||
@@ -324,10 +324,10 @@ RC=0
|
||||
./testdata/grepinput-9-
|
||||
./testdata/grepinput:10:This pattern is in lower case.
|
||||
--
|
||||
-./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE.
|
||||
-./testdata/grepinput-608-=============================
|
||||
-./testdata/grepinput-609-
|
||||
-./testdata/grepinput:610:Check up on PATTERN near the end.
|
||||
+./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE.
|
||||
+./testdata/grepinput-621-=============================
|
||||
+./testdata/grepinput-622-
|
||||
+./testdata/grepinput:623:Check up on PATTERN near the end.
|
||||
--
|
||||
./testdata/grepinputx-1-This is a second file of input for the pcregrep tests.
|
||||
./testdata/grepinputx-2-
|
||||
@@ -349,8 +349,8 @@ RC=0
|
||||
./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long.
|
||||
./testdata/grepinput-13-
|
||||
--
|
||||
-./testdata/grepinput:610:Check up on PATTERN near the end.
|
||||
-./testdata/grepinput-611-This is the last line of this file.
|
||||
+./testdata/grepinput:623:Check up on PATTERN near the end.
|
||||
+./testdata/grepinput-624-This is the last line of this file.
|
||||
--
|
||||
./testdata/grepinputx:3:Here is the pattern again.
|
||||
./testdata/grepinputx-4-
|
||||
@@ -755,3 +755,34 @@ RC=0
|
||||
RC=0
|
||||
---------------------------- Test 109 -----------------------------
|
||||
RC=0
|
||||
+---------------------------- Test 110 -----------------------------
|
||||
+match 1:
|
||||
+ a
|
||||
+/1/a
|
||||
+match 2:
|
||||
+ b
|
||||
+/2/b
|
||||
+match 3:
|
||||
+ c
|
||||
+/3/c
|
||||
+match 4:
|
||||
+ d
|
||||
+/4/d
|
||||
+match 5:
|
||||
+ e
|
||||
+/5/e
|
||||
+RC=0
|
||||
+---------------------------- Test 111 -----------------------------
|
||||
+607:0,12
|
||||
+609:0,12
|
||||
+611:0,12
|
||||
+613:0,12
|
||||
+615:0,12
|
||||
+RC=0
|
||||
+---------------------------- Test 112 -----------------------------
|
||||
+37168,12
|
||||
+37180,12
|
||||
+37192,12
|
||||
+37204,12
|
||||
+37216,12
|
||||
+RC=0
|
||||
--
|
||||
2.5.5
|
||||
|
15
pcre2.spec
15
pcre2.spec
@ -2,7 +2,7 @@
|
||||
#%%global rcversion RC1
|
||||
Name: pcre2
|
||||
Version: 10.21
|
||||
Release: %{?rcversion:0.}5%{?rcversion:.%rcversion}%{?dist}
|
||||
Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist}
|
||||
%global myversion %{version}%{?rcversion:-%rcversion}
|
||||
Summary: Perl-compatible regular expression library
|
||||
Group: System Environment/Libraries
|
||||
@ -36,6 +36,13 @@ Patch5: pcre2-10.21-A-racing-condition-is-fixed-in-JIT-reported-by-Mozil.pat
|
||||
Patch6: pcre2-10.21-Fix-typo-in-test-program.patch
|
||||
# Enable JIT in the pcre2grep tool, fixed in upstream after 10.21
|
||||
Patch7: pcre2-10.21-Make-pcre2grep-use-JIT-it-was-omitted-by-mistake.patch
|
||||
# Fix repeated pcregrep output if -o with -M options were used and the match
|
||||
# extended over a line boundary, upstream bug #1848, fixed in upstream after
|
||||
# 10.21
|
||||
Patch8: pcre2-10.21-Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch
|
||||
# Documentation for Fix-bad-interaction-between-o-and-M-in-pcre2grep.patch,
|
||||
# upstream bug #1848, fixed in upstream after 10.21
|
||||
Patch9: pcre2-10.21-Documentation-clarification.patch
|
||||
|
||||
# New libtool to get rid of RPATH and to use distribution autotools
|
||||
BuildRequires: autoconf
|
||||
@ -121,6 +128,8 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
%patch7 -p1
|
||||
%patch8 -p1
|
||||
%patch9 -p1
|
||||
# Because of multilib patch
|
||||
libtoolize --copy --force
|
||||
autoreconf -vif
|
||||
@ -216,6 +225,10 @@ make %{?_smp_mflags} check VERBOSE=yes
|
||||
%{_mandir}/man1/pcre2test.*
|
||||
|
||||
%changelog
|
||||
* Mon Jun 20 2016 Petr Pisar <ppisar@redhat.com> - 10.21-6
|
||||
- Fix repeated pcregrep output if -o with -M options were used and the match
|
||||
extended over a line boundary (upstream bug #1848)
|
||||
|
||||
* Fri Jun 03 2016 Petr Pisar <ppisar@redhat.com> - 10.21-5
|
||||
- Fix a race in JIT locking condition
|
||||
- Fix an ovector check in JIT test program
|
||||
|
Loading…
Reference in New Issue
Block a user