From 6cc8347f99a8bca3a37028c56e8554651b0dcd1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Mon, 27 Jan 2020 13:52:49 +0100 Subject: [PATCH] Prevent from a stack exhaustion when studying a pattern for nested groups by putting a limit of 1000 recursive calls --- ...ecursion-in-pcre2_study-to-avoid-sta.patch | 117 ++++++++++++++++++ pcre2.spec | 6 + 2 files changed, 123 insertions(+) create mode 100644 pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch diff --git a/pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch b/pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch new file mode 100644 index 0000000..38513d4 --- /dev/null +++ b/pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch @@ -0,0 +1,117 @@ +From b251f0bc17a4d5a3b3f7690432113c773bcbe13f Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Mon, 27 Jan 2020 10:28:19 +0000 +Subject: [PATCH] Limit function recursion in pcre2_study to avoid stack + overflow issues. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1213 6239d852-aaf2-0410-a92c-79f79f948069 +Petr Písař: Port to 10.34. +--- + src/pcre2_study.c | 31 ++++++++++++++++++++++--------- + +diff --git a/src/pcre2_study.c b/src/pcre2_study.c +index 2883868..5af01b5 100644 +--- a/src/pcre2_study.c ++++ b/src/pcre2_study.c +@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge +- New API code Copyright (c) 2016-2019 University of Cambridge ++ New API code Copyright (c) 2016-2020 University of Cambridge + + ----------------------------------------------------------------------------- + Redistribution and use in source and binary forms, with or without +@@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */ + + /* Returns from set_start_bits() */ + +-enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; ++enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP }; + + + /************************************************* +@@ -924,19 +924,24 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as + must continue at the outer level to find at least one mandatory code unit. At + the outermost level, this function fails unless the result is SSB_DONE. + ++We restrict recursion (for nested groups) to 1000 to avoid stack overflow ++issues. ++ + Arguments: + re points to the compiled regex block + code points to an expression + utf TRUE if in UTF mode ++ depthptr pointer to recurse depth + + Returns: SSB_FAIL => Failed to find any starting code units + SSB_DONE => Found mandatory starting code units + SSB_CONTINUE => Found optional starting code units + SSB_UNKNOWN => Hit an unrecognized opcode ++ SSB_TOODEEP => Recursion is too deep + */ + + static int +-set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf) ++set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, int *depthptr) + { + uint32_t c; + int yield = SSB_DONE; +@@ -947,6 +952,9 @@ int table_limit = utf? 16:32; + int table_limit = 32; + #endif + ++*depthptr += 1; ++if (*depthptr > 1000) return SSB_TOODEEP; ++ + do + { + BOOL try_next = TRUE; +@@ -1103,13 +1111,17 @@ do + case OP_SCRIPT_RUN: + case OP_ASSERT: + case OP_ASSERT_NA: +- rc = set_start_bits(re, tcode, utf); +- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; +- if (rc == SSB_DONE) try_next = FALSE; else ++ rc = set_start_bits(re, tcode, utf, depthptr); ++ if (rc == SSB_DONE) ++ { ++ try_next = FALSE; ++ } ++ else if (rc == SSB_CONTINUE) + { + do tcode += GET(tcode, 1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + } ++ else return rc; /* FAIL, UNKNOWN, or TOODEEP */ + break; + + /* If we hit ALT or KET, it means we haven't found anything mandatory in +@@ -1155,8 +1167,8 @@ do + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: +- rc = set_start_bits(re, ++tcode, utf); +- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; ++ rc = set_start_bits(re, ++tcode, utf, depthptr); ++ if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc; + do tcode += GET(tcode,1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + break; +@@ -1664,7 +1676,8 @@ code units. */ + + if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) + { +- int rc = set_start_bits(re, code, utf); ++ int depth = 0; ++ int rc = set_start_bits(re, code, utf, &depth); + if (rc == SSB_UNKNOWN) return 1; + + /* If a list of starting code units was set up, scan the list to see if only +-- +2.21.1 + diff --git a/pcre2.spec b/pcre2.spec index 951981d..ad93384 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -68,6 +68,9 @@ Patch5: pcre2-10.34-Ensure-a-newline-after-the-final-line-in-a-file-is-o.pat # Fix processing (?(DEFINE)...) within look-behind assertions, # in upstream after 10.34 Patch6: pcre2-10.34-Fix-bug-in-processing-DEFINE-.-within-lookbehind-ass.patch +# Prevent from a stack exhaustion when studying a pattern for nested groups by +# putting a limit of 1000 recursive calls, in upstream after 10.34 +Patch7: pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -152,6 +155,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch4 -p1 %patch5 -p1 %patch6 -p1 +%patch7 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -254,6 +258,8 @@ make %{?_smp_mflags} check VERBOSE=yes - Ensure a newline after the final line in a file is output by pcre2grep (upstream bug #2513) - Fix processing (?(DEFINE)...) within look-behind assertions +- Prevent from a stack exhaustion when studying a pattern for nested groups by + putting a limit of 1000 recursive calls * Mon Jan 13 2020 Petr Pisar - 10.34-4 - Fix a crash in JITted code when a *THEN verb is used in a lookahead assertion