218 lines
6.1 KiB
Diff
218 lines
6.1 KiB
Diff
From 44c8382acfe0902b302e0d7a5b1c6d9ee9226a51 Mon Sep 17 00:00:00 2001
|
|
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
|
Date: Tue, 16 Jul 2019 15:06:21 +0000
|
|
Subject: [PATCH] Fix lookbehind within lookahead within lookbehind
|
|
misbehaviour bug.
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1133 6239d852-aaf2-0410-a92c-79f79f948069
|
|
Petr Písař: Ported to 10.33.
|
|
|
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
---
|
|
src/pcre2_compile.c | 58 +++++++++++++++++++++++++++++---------------
|
|
testdata/testinput1 | 6 +++++
|
|
testdata/testinput2 | 3 +++
|
|
testdata/testoutput1 | 9 +++++++
|
|
testdata/testoutput2 | 4 +++
|
|
5 files changed, 61 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
|
index f6e0a0b..2ae95ed 100644
|
|
--- a/src/pcre2_compile.c
|
|
+++ b/src/pcre2_compile.c
|
|
@@ -135,6 +135,8 @@ static BOOL
|
|
set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *,
|
|
compile_block *);
|
|
|
|
+static int
|
|
+ check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
|
|
|
|
|
|
/*************************************************
|
|
@@ -8997,15 +8999,15 @@ for (;; pptr++)
|
|
}
|
|
break;
|
|
|
|
- /* Lookaheads can be ignored, but we must start the skip inside the group
|
|
- so that it isn't treated as a group within the branch. */
|
|
+ /* Lookaheads do not contribute to the length of this branch, but they may
|
|
+ contain lookbehinds within them whose lengths need to be set. */
|
|
|
|
case META_LOOKAHEAD:
|
|
case META_LOOKAHEADNOT:
|
|
- pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
|
- if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
|
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
|
|
+ if (*errcodeptr != 0) return -1;
|
|
|
|
- /* Also ignore any qualifiers that follow a lookahead assertion. */
|
|
+ /* Ignore any qualifiers that follow a lookahead assertion. */
|
|
|
|
switch (pptr[1])
|
|
{
|
|
@@ -9319,20 +9321,28 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
|
|
the error offset is marked unset. The enables the functions above not to
|
|
override settings from deeper nestings.
|
|
|
|
-Arguments cb points to the compile block
|
|
-Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
|
+This function is called recursively from get_branchlength() for lookaheads in
|
|
+order to process any lookbehinds that they may contain. It stops when it hits a
|
|
+non-nested closing parenthesis in this case, returning a pointer to it.
|
|
+
|
|
+Arguments
|
|
+ pptr points to where to start (start of pattern or start of lookahead)
|
|
+ retptr if not NULL, return the ket pointer here
|
|
+ cb points to the compile block
|
|
+
|
|
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
|
*/
|
|
|
|
static int
|
|
-check_lookbehinds(compile_block *cb)
|
|
+check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
|
|
{
|
|
-uint32_t *pptr;
|
|
int errorcode = 0;
|
|
int loopcount = 0;
|
|
+int nestlevel = 0;
|
|
|
|
cb->erroroffset = PCRE2_UNSET;
|
|
|
|
-for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|
+for (; *pptr != META_END; pptr++)
|
|
{
|
|
if (*pptr < META_END) continue; /* Literal */
|
|
|
|
@@ -9346,14 +9356,30 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|
pptr += 1;
|
|
break;
|
|
|
|
+ case META_KET:
|
|
+ if (--nestlevel < 0)
|
|
+ {
|
|
+ if (retptr != NULL) *retptr = pptr;
|
|
+ return 0;
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case META_ATOMIC:
|
|
+ case META_CAPTURE:
|
|
+ case META_COND_ASSERT:
|
|
+ case META_LOOKAHEAD:
|
|
+ case META_LOOKAHEADNOT:
|
|
+ case META_NOCAPTURE:
|
|
+ case META_SCRIPT_RUN:
|
|
+ nestlevel++;
|
|
+ break;
|
|
+
|
|
case META_ACCEPT:
|
|
case META_ALT:
|
|
case META_ASTERISK:
|
|
case META_ASTERISK_PLUS:
|
|
case META_ASTERISK_QUERY:
|
|
- case META_ATOMIC:
|
|
case META_BACKREF:
|
|
- case META_CAPTURE:
|
|
case META_CIRCUMFLEX:
|
|
case META_CLASS:
|
|
case META_CLASS_EMPTY:
|
|
@@ -9361,14 +9387,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|
case META_CLASS_END:
|
|
case META_CLASS_NOT:
|
|
case META_COMMIT:
|
|
- case META_COND_ASSERT:
|
|
case META_DOLLAR:
|
|
case META_DOT:
|
|
case META_FAIL:
|
|
- case META_KET:
|
|
- case META_LOOKAHEAD:
|
|
- case META_LOOKAHEADNOT:
|
|
- case META_NOCAPTURE:
|
|
case META_PLUS:
|
|
case META_PLUS_PLUS:
|
|
case META_PLUS_QUERY:
|
|
@@ -9378,7 +9399,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|
case META_QUERY_QUERY:
|
|
case META_RANGE_ESCAPED:
|
|
case META_RANGE_LITERAL:
|
|
- case META_SCRIPT_RUN:
|
|
case META_SKIP:
|
|
case META_THEN:
|
|
break;
|
|
@@ -9879,7 +9899,7 @@ lengths. */
|
|
|
|
if (has_lookbehind)
|
|
{
|
|
- errorcode = check_lookbehinds(&cb);
|
|
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
|
|
if (errorcode != 0) goto HAD_CB_ERROR;
|
|
}
|
|
|
|
diff --git a/testdata/testinput1 b/testdata/testinput1
|
|
index 4d9ec5a..ee9354b 100644
|
|
--- a/testdata/testinput1
|
|
+++ b/testdata/testinput1
|
|
@@ -6365,4 +6365,10 @@ ef) x/x,mark
|
|
/(?(DEFINE)(a|ab))(?1){1}+c/
|
|
abc
|
|
|
|
+/(?<=(?=.(?<=x)))/aftertext
|
|
+ abx
|
|
+
|
|
+/(?<=(?=(?<=a)))b/
|
|
+ ab
|
|
+
|
|
# End of testinput1
|
|
diff --git a/testdata/testinput2 b/testdata/testinput2
|
|
index 9412bf6..d85fc5f 100644
|
|
--- a/testdata/testinput2
|
|
+++ b/testdata/testinput2
|
|
@@ -5597,4 +5597,7 @@ a)"xI
|
|
# Multiplication overflow
|
|
/(X{65535})(?<=\1{32770})/
|
|
|
|
+/(?<=(?=.(?<=x)))/
|
|
+ ab\=ph
|
|
+
|
|
# End of testinput2
|
|
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
|
index fffb8ec..c9bfea8 100644
|
|
--- a/testdata/testoutput1
|
|
+++ b/testdata/testoutput1
|
|
@@ -10081,4 +10081,13 @@ No match
|
|
abc
|
|
No match
|
|
|
|
+/(?<=(?=.(?<=x)))/aftertext
|
|
+ abx
|
|
+ 0:
|
|
+ 0+ x
|
|
+
|
|
+/(?<=(?=(?<=a)))b/
|
|
+ ab
|
|
+ 0: b
|
|
+
|
|
# End of testinput1
|
|
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
|
index 950095f..6405e26 100644
|
|
--- a/testdata/testoutput2
|
|
+++ b/testdata/testoutput2
|
|
@@ -16948,6 +16948,10 @@ Failed: error 187 at offset 15: lookbehind assertion is too long
|
|
/(X{65535})(?<=\1{32770})/
|
|
Failed: error 187 at offset 10: lookbehind assertion is too long
|
|
|
|
+/(?<=(?=.(?<=x)))/
|
|
+ ab\=ph
|
|
+No match
|
|
+
|
|
# End of testinput2
|
|
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
|
Error -62: bad serialized data
|
|
--
|
|
2.20.1
|
|
|