Needed for
commit 5d1fed8df9
Author: Petr Písař <ppisar@redhat.com>
Date: Mon Jul 14 13:15:27 2014 +0200
Fix empty-matching possessive zero-repeat groups in interpreted mode
193 lines
6.3 KiB
Diff
193 lines
6.3 KiB
Diff
From 31152356367ef3cf3440c0431d2898f198e4dd18 Mon Sep 17 00:00:00 2001
|
|
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
|
Date: Tue, 27 May 2014 13:18:31 +0000
|
|
Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
|
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
|
|
Petr Pisar: Ported to 8.35.
|
|
|
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
---
|
|
pcre_exec.c | 43 +++++++++++++++++++++++++++----------------
|
|
testdata/testinput1 | 9 +++++++++
|
|
testdata/testinput8 | 6 ++++++
|
|
testdata/testoutput1 | 12 ++++++++++++
|
|
testdata/testoutput8 | 8 ++++++++
|
|
5 files changed, 62 insertions(+), 16 deletions(-)
|
|
|
|
diff --git a/pcre_exec.c b/pcre_exec.c
|
|
index 5dec992..5a8dbad 100644
|
|
--- a/pcre_exec.c
|
|
+++ b/pcre_exec.c
|
|
@@ -1167,11 +1167,16 @@ for (;;)
|
|
if (rrc == MATCH_KETRPOS)
|
|
{
|
|
offset_top = md->end_offset_top;
|
|
- eptr = md->end_match_ptr;
|
|
ecode = md->start_code + code_offset;
|
|
save_capture_last = md->capture_last;
|
|
matched_once = TRUE;
|
|
mstart = md->start_match_ptr; /* In case \K changed it */
|
|
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
|
|
+ {
|
|
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
|
+ break;
|
|
+ }
|
|
+ eptr = md->end_match_ptr;
|
|
continue;
|
|
}
|
|
|
|
@@ -1241,10 +1246,15 @@ for (;;)
|
|
if (rrc == MATCH_KETRPOS)
|
|
{
|
|
offset_top = md->end_offset_top;
|
|
- eptr = md->end_match_ptr;
|
|
ecode = md->start_code + code_offset;
|
|
matched_once = TRUE;
|
|
mstart = md->start_match_ptr; /* In case \K reset it */
|
|
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
|
|
+ {
|
|
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
|
+ break;
|
|
+ }
|
|
+ eptr = md->end_match_ptr;
|
|
continue;
|
|
}
|
|
|
|
@@ -1894,7 +1904,7 @@ for (;;)
|
|
case OP_KETRMAX:
|
|
case OP_KETRPOS:
|
|
prev = ecode - GET(ecode, 1);
|
|
-
|
|
+
|
|
/* If this was a group that remembered the subject start, in order to break
|
|
infinite repeats of empty string matches, retrieve the subject start from
|
|
the chain. Otherwise, set it NULL. */
|
|
@@ -1919,7 +1929,7 @@ for (;;)
|
|
md->start_match_ptr = mstart;
|
|
RRETURN(MATCH_MATCH); /* Sets md->mark */
|
|
}
|
|
-
|
|
+
|
|
/* For capturing groups we have to check the group number back at the start
|
|
and if necessary complete handling an extraction by setting the offsets and
|
|
bumping the high water mark. Whole-pattern recursion is coded as a recurse
|
|
@@ -1979,6 +1989,19 @@ for (;;)
|
|
}
|
|
}
|
|
|
|
+ /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
|
+ and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
|
+ at a time from the outer level, thus saving stack. This must precede the
|
|
+ empty string test - in this case that test is done at the outer level. */
|
|
+
|
|
+ if (*ecode == OP_KETRPOS)
|
|
+ {
|
|
+ md->start_match_ptr = mstart; /* In case \K reset it */
|
|
+ md->end_match_ptr = eptr;
|
|
+ md->end_offset_top = offset_top;
|
|
+ RRETURN(MATCH_KETRPOS);
|
|
+ }
|
|
+
|
|
/* For an ordinary non-repeating ket, just continue at this level. This
|
|
also happens for a repeating ket if no characters were matched in the
|
|
group. This is the forcible breaking of infinite loops as implemented in
|
|
@@ -2001,18 +2024,6 @@ for (;;)
|
|
break;
|
|
}
|
|
|
|
- /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
|
|
- and return the MATCH_KETRPOS. This makes it possible to do the repeats one
|
|
- at a time from the outer level, thus saving stack. */
|
|
-
|
|
- if (*ecode == OP_KETRPOS)
|
|
- {
|
|
- md->start_match_ptr = mstart; /* In case \K reset it */
|
|
- md->end_match_ptr = eptr;
|
|
- md->end_offset_top = offset_top;
|
|
- RRETURN(MATCH_KETRPOS);
|
|
- }
|
|
-
|
|
/* The normal repeating kets try the rest of the pattern or restart from
|
|
the preceding bracket, in the appropriate order. In the second case, we can
|
|
use tail recursion to avoid using another stack frame, unless we have an
|
|
diff --git a/testdata/testinput1 b/testdata/testinput1
|
|
index f933692..ffb9455 100644
|
|
--- a/testdata/testinput1
|
|
+++ b/testdata/testinput1
|
|
@@ -5675,4 +5675,13 @@ AbcdCBefgBhiBqz
|
|
/[\Q]a\E]+/
|
|
aa]]
|
|
|
|
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+
|
|
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+
|
|
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+
|
|
/-- End of testinput1 --/
|
|
diff --git a/testdata/testinput8 b/testdata/testinput8
|
|
index bb2747b..06334cd 100644
|
|
--- a/testdata/testinput8
|
|
+++ b/testdata/testinput8
|
|
@@ -4831,4 +4831,10 @@
|
|
/[ab]{2,}?/
|
|
aaaa
|
|
|
|
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+
|
|
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+
|
|
/-- End of testinput8 --/
|
|
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
|
index 3d9a328..b2ae430 100644
|
|
--- a/testdata/testoutput1
|
|
+++ b/testdata/testoutput1
|
|
@@ -9325,4 +9325,16 @@ No match
|
|
aa]]
|
|
0: aa]]
|
|
|
|
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
+
|
|
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
+
|
|
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
+
|
|
/-- End of testinput1 --/
|
|
diff --git a/testdata/testoutput8 b/testdata/testoutput8
|
|
index 3861ea4..95c4e4d 100644
|
|
--- a/testdata/testoutput8
|
|
+++ b/testdata/testoutput8
|
|
@@ -7777,4 +7777,12 @@ Matched, but offsets vector is too small to show all matches
|
|
1: aaa
|
|
2: aa
|
|
|
|
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
+
|
|
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
|
|
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
|
|
+ 0: NON QUOTED "QUOT""ED" AFTER
|
|
+
|
|
/-- End of testinput8 --/
|
|
--
|
|
1.9.3
|
|
|