From 2b029aba91d42edb9dd958306a7909e2bb459b01 Mon Sep 17 00:00:00 2001 From: ph10 Date: Tue, 1 Nov 2016 15:58:28 +0000 Subject: [PATCH] Fix auto-anchor bug when .* is inside an assertion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported to 10.22: commit 6fba816130cccd2158dc9a6d30b03bb2bb31ef8c Author: ph10 Date: Tue Nov 1 15:58:28 2016 +0000 Fix auto-anchor bug when .* is inside an assertion. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@587 6239d852-aaf2-0410-a92c- 79f79f948069 Signed-off-by: Petr Písař --- src/pcre2_compile.c | 33 +++++++++++++++++++++++---------- testdata/testinput1 | 3 +++ testdata/testoutput1 | 4 ++++ 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index fe37310..b9b9361 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -7960,13 +7960,14 @@ Arguments: the less precise approach cb points to the compile data block atomcount atomic group level + inassert TRUE if in an assertion Returns: TRUE or FALSE */ static BOOL is_anchored(register PCRE2_SPTR code, unsigned int bracket_map, - compile_block *cb, int atomcount) + compile_block *cb, int atomcount, BOOL inassert) { do { PCRE2_SPTR scode = first_significant_code( @@ -7978,7 +7979,8 @@ do { if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) { - if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE; + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; } /* Capturing brackets */ @@ -7988,33 +7990,44 @@ do { { int n = GET2(scode, 1+LINK_SIZE); int new_map = bracket_map | ((n < 32)? (1u << n) : 1); - if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE; + if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE; } - /* Positive forward assertions and conditions */ + /* Positive forward assertion */ - else if (op == OP_ASSERT || op == OP_COND) + else if (op == OP_ASSERT) { - if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE; + if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + } + + /* Condition */ + + else if (op == OP_COND) + { + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; } /* Atomic groups */ else if (op == OP_ONCE || op == OP_ONCE_NC) { - if (!is_anchored(scode, bracket_map, cb, atomcount + 1)) + if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert)) return FALSE; } /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and it isn't in brackets that are or may be referenced or inside an atomic - group. There is also an option that disables auto-anchoring. */ + group or an assertion. Also the pattern must not contain *PRUNE or *SKIP, + because these break the feature. Consider, for example, /(?s).*?(*PRUNE)b/ + with the subject "aab", which matches "b", i.e. not at the start of a line. + There is also an option that disables auto-anchoring. */ else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)) { if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 || - atomcount > 0 || cb->had_pruneorskip || + atomcount > 0 || cb->had_pruneorskip || inassert || (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) return FALSE; } @@ -8984,7 +8997,7 @@ there are no occurrences of *PRUNE or *SKIP (though there is an option to disable this case). */ if ((re->overall_options & PCRE2_ANCHORED) == 0 && - is_anchored(codestart, 0, &cb, 0)) + is_anchored(codestart, 0, &cb, 0, FALSE)) re->overall_options |= PCRE2_ANCHORED; /* If the pattern is still not anchored and we do not have a first code unit, diff --git a/testdata/testinput1 b/testdata/testinput1 index 0d680d3..2b4ec2c 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5798,4 +5798,7 @@ name)/mark /(?=.*X)X$/ \ X +/(?s)(?=.*?)b/ + aabc + # End of testinput1 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 02e07bf..774a5ec 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9265,4 +9265,8 @@ No match \ X 0: X +/(?s)(?=.*?)b/ + aabc + 0: b + # End of testinput1 -- 2.7.4