pcre2/pcre2-10.30-Fix-K-issues-in-pcre2grep.patch

150 lines
5.4 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From a109c9e35a040a7a8032c12ce7396bc949f3f735 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Tue, 26 Dec 2017 15:10:04 +0000
Subject: [PATCH 3/3] Fix \K issues in pcre2grep.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@896 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.30.
---
RunGrepTest | 11 +++++++++++
src/pcre2grep.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
testdata/grepoutput | 9 +++++++++
diff --git a/RunGrepTest b/RunGrepTest
index a7496cb..a26f677 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -630,6 +630,17 @@ echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
echo "RC=$?" >>testtrygrep
+echo "---------------------------- Test 125 -----------------------------" >>testtrygrep
+printf "abcd\n" >testNinputgrep
+$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always '(?<=\K[ac])' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
# Now compare the results.
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index e764313..02339f5 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -1607,7 +1607,7 @@ Returns: nothing
*/
static void
-do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
+do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
char *endptr, const char *printname)
{
if (after_context > 0 && lastmatchnumber > 0)
@@ -2764,11 +2764,38 @@ while (ptr < endptr)
if ((multiline || do_colour) && !invert)
{
int plength;
+ PCRE2_SIZE endprevious;
+
+ /* The use of \K may make the end offset earlier than the start. In
+ this situation, swap them round. */
+
+ if (offsets[0] > offsets[1])
+ {
+ PCRE2_SIZE temp = offsets[0];
+ offsets[0] = offsets[1];
+ offsets[1] = temp;
+ }
+
FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
+
for (;;)
{
- startoffset = offsets[1]; /* Advance after previous match. */
+ PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
+
+ endprevious = offsets[1];
+ startoffset = endprevious; /* Advance after previous match. */
+
+ /* If the pattern contained a lookbehind that included \K, it is
+ possible that the end of the match might be at or before the actual
+ starting offset we have just used. In this case, start one character
+ further on. */
+
+ if (startoffset <= oldstartoffset)
+ {
+ startoffset = oldstartoffset + 1;
+ if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
+ }
/* If the current match ended past the end of the line (only possible
in multiline mode), we must move on to the line in which it did end
@@ -2782,6 +2809,7 @@ while (ptr < endptr)
filepos += (int)(linelength + endlinelength);
linenumber++;
startoffset -= (int)(linelength + endlinelength);
+ endprevious -= (int)(linelength + endlinelength);
t = end_of_line(ptr, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
length = (size_t)(endptr - ptr);
@@ -2797,7 +2825,18 @@ while (ptr < endptr)
loop for any that may follow. */
if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
- FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout);
+
+ /* The use of \K may make the end offset earlier than the start. In
+ this situation, swap them round. */
+
+ if (offsets[0] > offsets[1])
+ {
+ PCRE2_SIZE temp = offsets[0];
+ offsets[0] = offsets[1];
+ offsets[1] = temp;
+ }
+
+ FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
}
@@ -2805,8 +2844,8 @@ while (ptr < endptr)
and its line-ending characters (if they matched the pattern), so there
may be no more to print. */
- plength = (int)((linelength + endlinelength) - startoffset);
- if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout);
+ plength = (int)((linelength + endlinelength) - endprevious);
+ if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
}
/* Not colouring or multiline; no need to search for further matches. */
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 7e963fb..e49c2b2 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -936,3 +936,12 @@ end
16:start end in between start
end
RC=0
+---------------------------- Test 125 -----------------------------
+abcd
+RC=0
+abcd
+RC=0
+abcd
+RC=0
+abcd
+RC=0
--
2.13.6