From e2eeaf85f1b5d6c4669b621d309ff904cbf96f4b Mon Sep 17 00:00:00 2001 From: ph10 Date: Wed, 5 Nov 2014 15:08:03 +0000 Subject: [PATCH] Fix bug when there are unset groups prior to (*ACCEPT) within a capturing group. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1510 2f5784b3-3f2a-0410-8824-cb99058d5e15 Signed-off-by: Petr Písař --- ChangeLog | 12 ++++++++++++ pcre_exec.c | 13 ++++++++++++- testdata/testinput1 | 3 +++ testdata/testoutput1 | 9 +++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 8abdfb5..06da1c4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,18 @@ ChangeLog for PCRE ------------------ +Version 8.37 xx-xxx-201x +------------------------ + +1. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges + for those parentheses to be closed with whatever has been captured so far. + However, it was failing to mark any other groups between the hightest + capture so far and the currrent group as "unset". Thus, the ovector for + those groups contained whatever was previously there. An example is the + pattern /(x)|((*ACCEPT))/ when matched against "abcd". + + + Version 8.36 26-September-2014 ------------------------------ diff --git a/pcre_exec.c b/pcre_exec.c index 654eb9e..fdf7067 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -1474,7 +1474,18 @@ for (;;) md->offset_vector[offset] = md->offset_vector[md->offset_end - number]; md->offset_vector[offset+1] = (int)(eptr - md->start_subject); - if (offset_top <= offset) offset_top = offset + 2; + + /* If this group is at or above the current highwater mark, ensure that + any groups between the current high water mark and this group are marked + unset and then update the high water mark. */ + + if (offset >= offset_top) + { + register int *iptr = md->offset_vector + offset_top; + register int *iend = md->offset_vector + offset; + while (iptr < iend) *iptr++ = -1; + offset_top = offset + 2; + } } ecode += 1 + IMM2_SIZE; break; diff --git a/testdata/testinput1 b/testdata/testinput1 index 123e3d3..091e307 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5720,4 +5720,7 @@ AbcdCBefgBhiBqz /[\Q]a\E]+/ aa]] +/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ + 1234abcd + /-- End of testinput1 --/ diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 5e71900..ba7ca37 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9411,4 +9411,13 @@ No match aa]] 0: aa]] +/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ + 1234abcd + 0: + 1: + 2: + 3: + 4: + 5: + /-- End of testinput1 --/ -- 1.9.3