From 1860c03fdead52378512df6a9175f48a9d45f2f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Mon, 14 Jul 2014 14:19:45 +0200 Subject: [PATCH] Add pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Needed for commit 5d1fed8df94a0db02c12ce61c0c2ddc8c97e64d3 Author: Petr Písař Date: Mon Jul 14 13:15:27 2014 +0200 Fix empty-matching possessive zero-repeat groups in interpreted mode --- ...ng-possessive-zero-repeat-groups-bug.patch | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch diff --git a/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch b/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch new file mode 100644 index 0000000..2d82134 --- /dev/null +++ b/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch @@ -0,0 +1,192 @@ +From 31152356367ef3cf3440c0431d2898f198e4dd18 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 27 May 2014 13:18:31 +0000 +Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15 +Signed-off-by: Petr Písař + +Petr Pisar: Ported to 8.35. + +Signed-off-by: Petr Písař +--- + pcre_exec.c | 43 +++++++++++++++++++++++++++---------------- + testdata/testinput1 | 9 +++++++++ + testdata/testinput8 | 6 ++++++ + testdata/testoutput1 | 12 ++++++++++++ + testdata/testoutput8 | 8 ++++++++ + 5 files changed, 62 insertions(+), 16 deletions(-) + +diff --git a/pcre_exec.c b/pcre_exec.c +index 5dec992..5a8dbad 100644 +--- a/pcre_exec.c ++++ b/pcre_exec.c +@@ -1167,11 +1167,16 @@ for (;;) + if (rrc == MATCH_KETRPOS) + { + offset_top = md->end_offset_top; +- eptr = md->end_match_ptr; + ecode = md->start_code + code_offset; + save_capture_last = md->capture_last; + matched_once = TRUE; + mstart = md->start_match_ptr; /* In case \K changed it */ ++ if (eptr == md->end_match_ptr) /* Matched an empty string */ ++ { ++ do ecode += GET(ecode, 1); while (*ecode == OP_ALT); ++ break; ++ } ++ eptr = md->end_match_ptr; + continue; + } + +@@ -1241,10 +1246,15 @@ for (;;) + if (rrc == MATCH_KETRPOS) + { + offset_top = md->end_offset_top; +- eptr = md->end_match_ptr; + ecode = md->start_code + code_offset; + matched_once = TRUE; + mstart = md->start_match_ptr; /* In case \K reset it */ ++ if (eptr == md->end_match_ptr) /* Matched an empty string */ ++ { ++ do ecode += GET(ecode, 1); while (*ecode == OP_ALT); ++ break; ++ } ++ eptr = md->end_match_ptr; + continue; + } + +@@ -1894,7 +1904,7 @@ for (;;) + case OP_KETRMAX: + case OP_KETRPOS: + prev = ecode - GET(ecode, 1); +- ++ + /* If this was a group that remembered the subject start, in order to break + infinite repeats of empty string matches, retrieve the subject start from + the chain. Otherwise, set it NULL. */ +@@ -1919,7 +1929,7 @@ for (;;) + md->start_match_ptr = mstart; + RRETURN(MATCH_MATCH); /* Sets md->mark */ + } +- ++ + /* For capturing groups we have to check the group number back at the start + and if necessary complete handling an extraction by setting the offsets and + bumping the high water mark. Whole-pattern recursion is coded as a recurse +@@ -1979,6 +1989,19 @@ for (;;) + } + } + ++ /* OP_KETRPOS is a possessive repeating ket. Remember the current position, ++ and return the MATCH_KETRPOS. This makes it possible to do the repeats one ++ at a time from the outer level, thus saving stack. This must precede the ++ empty string test - in this case that test is done at the outer level. */ ++ ++ if (*ecode == OP_KETRPOS) ++ { ++ md->start_match_ptr = mstart; /* In case \K reset it */ ++ md->end_match_ptr = eptr; ++ md->end_offset_top = offset_top; ++ RRETURN(MATCH_KETRPOS); ++ } ++ + /* For an ordinary non-repeating ket, just continue at this level. This + also happens for a repeating ket if no characters were matched in the + group. This is the forcible breaking of infinite loops as implemented in +@@ -2001,18 +2024,6 @@ for (;;) + break; + } + +- /* OP_KETRPOS is a possessive repeating ket. Remember the current position, +- and return the MATCH_KETRPOS. This makes it possible to do the repeats one +- at a time from the outer level, thus saving stack. */ +- +- if (*ecode == OP_KETRPOS) +- { +- md->start_match_ptr = mstart; /* In case \K reset it */ +- md->end_match_ptr = eptr; +- md->end_offset_top = offset_top; +- RRETURN(MATCH_KETRPOS); +- } +- + /* The normal repeating kets try the rest of the pattern or restart from + the preceding bracket, in the appropriate order. In the second case, we can + use tail recursion to avoid using another stack frame, unless we have an +diff --git a/testdata/testinput1 b/testdata/testinput1 +index f933692..ffb9455 100644 +--- a/testdata/testinput1 ++++ b/testdata/testinput1 +@@ -5675,4 +5675,13 @@ AbcdCBefgBhiBqz + /[\Q]a\E]+/ + aa]] + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ + /-- End of testinput1 --/ +diff --git a/testdata/testinput8 b/testdata/testinput8 +index bb2747b..06334cd 100644 +--- a/testdata/testinput8 ++++ b/testdata/testinput8 +@@ -4831,4 +4831,10 @@ + /[ab]{2,}?/ + aaaa + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ + /-- End of testinput8 --/ +diff --git a/testdata/testoutput1 b/testdata/testoutput1 +index 3d9a328..b2ae430 100644 +--- a/testdata/testoutput1 ++++ b/testdata/testoutput1 +@@ -9325,4 +9325,16 @@ No match + aa]] + 0: aa]] + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ + /-- End of testinput1 --/ +diff --git a/testdata/testoutput8 b/testdata/testoutput8 +index 3861ea4..95c4e4d 100644 +--- a/testdata/testoutput8 ++++ b/testdata/testoutput8 +@@ -7777,4 +7777,12 @@ Matched, but offsets vector is too small to show all matches + 1: aaa + 2: aa + ++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ ++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' ++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED ++ 0: NON QUOTED "QUOT""ED" AFTER ++ + /-- End of testinput8 --/ +-- +1.9.3 +