From 4c3e518bff94e5f206a63e3a1e5d7e570402786b Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 3 Aug 2019 08:30:40 +0000 Subject: [PATCH] Fix incorrect computation of group length when one branch exceeded 65535. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1155 6239d852-aaf2-0410-a92c-79f79f948069 Petr Písař: Ported to 10.33. Signed-off-by: Petr Písař --- src/pcre2_study.c | 18 ++++++++++++------ testdata/testinput2 | 8 ++++++++ testdata/testoutput2 | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/pcre2_study.c b/src/pcre2_study.c index e883c2e..cb5e7f1 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -103,6 +103,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code, int *backref_cache) { int length = -1; +int branchlength = 0; int prev_cap_recno = -1; int prev_cap_d = 0; int prev_recurse_recno = -1; @@ -110,9 +111,9 @@ int prev_recurse_d = 0; uint32_t once_fudge = 0; BOOL had_recurse = FALSE; BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0; -recurse_check this_recurse; -int branchlength = 0; +PCRE2_SPTR nextbranch = code + GET(code, 1); PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; +recurse_check this_recurse; /* If this is a "could be empty" group, its minimum length is 0. */ @@ -128,16 +129,20 @@ if ((*countptr)++ > 1000) return -1; /* Scan along the opcodes for this branch. If we get to the end of the branch, check the length against that of the other branches. If the accumulated length -passes 16-bits, stop. */ +passes 16-bits, reset to that value and skip the rest of the branch. */ for (;;) { int d, min, recno; - PCRE2_UCHAR *cs, *ce; - PCRE2_UCHAR op = *cc; + PCRE2_UCHAR op, *cs, *ce; - if (branchlength >= UINT16_MAX) return UINT16_MAX; + if (branchlength >= UINT16_MAX) + { + branchlength = UINT16_MAX; + cc = (PCRE2_UCHAR *)nextbranch; + } + op = *cc; switch (op) { case OP_COND: @@ -227,6 +232,7 @@ for (;;) if (length < 0 || (!had_recurse && branchlength < length)) length = branchlength; if (op != OP_ALT) return length; + nextbranch = cc + GET(cc, 1); cc += 1 + LINK_SIZE; branchlength = 0; had_recurse = FALSE; diff --git a/testdata/testinput2 b/testdata/testinput2 index 1bfe591..384239a 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5603,4 +5603,12 @@ a)"xI # Expect error (recursion => not fixed length) /(\2)((?=(?<=\1)))/ +/\A\s*(a|(?:[^`]{28500}){4})/I + a + +/\A\s*((?:[^`]{28500}){4})/I + +/\A\s*((?:[^`]{28500}){4}|a)/I + a + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 758b4db..0983741 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -16956,6 +16956,33 @@ No match /(\2)((?=(?<=\1)))/ Failed: error 125 at offset 8: lookbehind assertion is not fixed length +/\A\s*(a|(?:[^`]{28500}){4})/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + a + 0: a + 1: a + +/\A\s*((?:[^`]{28500}){4})/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 65535 + +/\A\s*((?:[^`]{28500}){4}|a)/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + a + 0: a + 1: a + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data -- 2.20.1