From a690f6bf85225c34d3e336664d2437c16f7a7238 Mon Sep 17 00:00:00 2001 From: ph10 Date: Tue, 7 Apr 2015 16:19:03 +0000 Subject: [PATCH] Fix pcretest loop for \K in lookbehind assertion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream commit ported to 8.36: commit c3579a7581cb8b3ca3c9617d63083afea29de646 Author: ph10 Date: Tue Apr 7 16:19:03 2015 +0000 Fix pcretest loop for \K in lookbehind assertion. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1544 2f5784b3-3f2a-0410-8824-cb99058d5e15 Signed-off-by: Petr Písař --- pcretest.c | 30 +++++++++++++++++++++++++++--- testdata/testinput2 | 6 ++++++ testdata/testinput5 | 6 ++++++ testdata/testoutput2 | 28 ++++++++++++++++++++++++++++ testdata/testoutput5 | 28 ++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 3 deletions(-) diff --git a/pcretest.c b/pcretest.c index b8dc3c6..6e6ef48 100644 --- a/pcretest.c +++ b/pcretest.c @@ -5618,9 +5618,33 @@ while (!done) g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; } - /* For /g, update the start offset, leaving the rest alone */ - - if (do_g) start_offset = use_offsets[1]; + /* For /g, update the start offset, leaving the rest alone. There is a + tricky case when \K is used in a positive lookbehind assertion. This can + cause the end of the match to be less than or equal to the start offset. + In this case we restart at one past the start offset. This may return the + same match if the original start offset was bumped along during the + match, but eventually the new start offset will hit the actual start + offset. (In PCRE2 the true start offset is available, and this can be + done better. It is not worth doing more than making sure we do not loop + at this stage in the life of PCRE1.) */ + + if (do_g) + { + if (g_notempty == 0 && use_offsets[1] <= start_offset) + { + if (start_offset >= len) break; /* End of subject */ + start_offset++; + if (use_utf) + { + while (start_offset < len) + { + if ((bptr[start_offset] & 0xc0) != 0x80) break; + start_offset++; + } + } + } + else start_offset = use_offsets[1]; + } /* For /G, update the pointer and length */ diff --git a/testdata/testinput2 b/testdata/testinput2 index 0fc535a..f8fbced 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4088,4 +4088,10 @@ backtracking verbs. --/ "(?<=((?2))((?1)))" +/(?<=\Ka)/g+ + aaaaa + +/(?<=\Ka)/G+ + aaaaa + /-- End of testinput2 --/ diff --git a/testdata/testinput5 b/testdata/testinput5 index e36b09d..fe7218d 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -790,4 +790,10 @@ /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/8BZ +/(?<=\K\x{17f})/8g+ + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + +/(?<=\K\x{17f})/8G+ + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + /-- End of testinput5 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 45662a9..5233de4 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14221,4 +14221,32 @@ No match "(?<=((?2))((?1)))" Failed: lookbehind assertion is not fixed length at offset 17 +/(?<=\Ka)/g+ + aaaaa + 0: a + 0+ aaaa + 0: a + 0+ aaaa + 0: a + 0+ aaa + 0: a + 0+ aa + 0: a + 0+ a + 0: a + 0+ + +/(?<=\Ka)/G+ + aaaaa + 0: a + 0+ aaaa + 0: a + 0+ aaa + 0: a + 0+ aa + 0: a + 0+ a + 0: a + 0+ + /-- End of testinput2 --/ diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 5c098e6..b338e23 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -1897,4 +1897,32 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 5 End ------------------------------------------------------------------ +/(?<=\K\x{17f})/8g+ + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f} + 0: \x{17f} + 0+ + +/(?<=\K\x{17f})/8G+ + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f} + 0: \x{17f} + 0+ + /-- End of testinput5 --/ -- 2.1.0