Prevent from a stack exhaustion when studying a pattern for nested groups by putting a limit of 1000 recursive calls

This commit is contained in:
Petr Písař 2020-01-27 13:52:49 +01:00
parent 6b7a3ed56a
commit 6cc8347f99
2 changed files with 123 additions and 0 deletions

View File

@ -0,0 +1,117 @@
From b251f0bc17a4d5a3b3f7690432113c773bcbe13f Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 27 Jan 2020 10:28:19 +0000
Subject: [PATCH] Limit function recursion in pcre2_study to avoid stack
overflow issues.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1213 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Port to 10.34.
---
src/pcre2_study.c | 31 ++++++++++++++++++++++---------
diff --git a/src/pcre2_study.c b/src/pcre2_study.c
index 2883868..5af01b5 100644
--- a/src/pcre2_study.c
+++ b/src/pcre2_study.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */
/* Returns from set_start_bits() */
-enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
+enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };
/*************************************************
@@ -924,19 +924,24 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
must continue at the outer level to find at least one mandatory code unit. At
the outermost level, this function fails unless the result is SSB_DONE.
+We restrict recursion (for nested groups) to 1000 to avoid stack overflow
+issues.
+
Arguments:
re points to the compiled regex block
code points to an expression
utf TRUE if in UTF mode
+ depthptr pointer to recurse depth
Returns: SSB_FAIL => Failed to find any starting code units
SSB_DONE => Found mandatory starting code units
SSB_CONTINUE => Found optional starting code units
SSB_UNKNOWN => Hit an unrecognized opcode
+ SSB_TOODEEP => Recursion is too deep
*/
static int
-set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
+set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, int *depthptr)
{
uint32_t c;
int yield = SSB_DONE;
@@ -947,6 +952,9 @@ int table_limit = utf? 16:32;
int table_limit = 32;
#endif
+*depthptr += 1;
+if (*depthptr > 1000) return SSB_TOODEEP;
+
do
{
BOOL try_next = TRUE;
@@ -1103,13 +1111,17 @@ do
case OP_SCRIPT_RUN:
case OP_ASSERT:
case OP_ASSERT_NA:
- rc = set_start_bits(re, tcode, utf);
- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
- if (rc == SSB_DONE) try_next = FALSE; else
+ rc = set_start_bits(re, tcode, utf, depthptr);
+ if (rc == SSB_DONE)
+ {
+ try_next = FALSE;
+ }
+ else if (rc == SSB_CONTINUE)
{
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
}
+ else return rc; /* FAIL, UNKNOWN, or TOODEEP */
break;
/* If we hit ALT or KET, it means we haven't found anything mandatory in
@@ -1155,8 +1167,8 @@ do
case OP_BRAZERO:
case OP_BRAMINZERO:
case OP_BRAPOSZERO:
- rc = set_start_bits(re, ++tcode, utf);
- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
+ rc = set_start_bits(re, ++tcode, utf, depthptr);
+ if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
@@ -1664,7 +1676,8 @@ code units. */
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{
- int rc = set_start_bits(re, code, utf);
+ int depth = 0;
+ int rc = set_start_bits(re, code, utf, &depth);
if (rc == SSB_UNKNOWN) return 1;
/* If a list of starting code units was set up, scan the list to see if only
--
2.21.1

View File

@ -68,6 +68,9 @@ Patch5: pcre2-10.34-Ensure-a-newline-after-the-final-line-in-a-file-is-o.pat
# Fix processing (?(DEFINE)...) within look-behind assertions, # Fix processing (?(DEFINE)...) within look-behind assertions,
# in upstream after 10.34 # in upstream after 10.34
Patch6: pcre2-10.34-Fix-bug-in-processing-DEFINE-.-within-lookbehind-ass.patch Patch6: pcre2-10.34-Fix-bug-in-processing-DEFINE-.-within-lookbehind-ass.patch
# Prevent from a stack exhaustion when studying a pattern for nested groups by
# putting a limit of 1000 recursive calls, in upstream after 10.34
Patch7: pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch
BuildRequires: autoconf BuildRequires: autoconf
BuildRequires: automake BuildRequires: automake
BuildRequires: coreutils BuildRequires: coreutils
@ -152,6 +155,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%patch4 -p1 %patch4 -p1
%patch5 -p1 %patch5 -p1
%patch6 -p1 %patch6 -p1
%patch7 -p1
# Because of multilib patch # Because of multilib patch
libtoolize --copy --force libtoolize --copy --force
autoreconf -vif autoreconf -vif
@ -254,6 +258,8 @@ make %{?_smp_mflags} check VERBOSE=yes
- Ensure a newline after the final line in a file is output by pcre2grep - Ensure a newline after the final line in a file is output by pcre2grep
(upstream bug #2513) (upstream bug #2513)
- Fix processing (?(DEFINE)...) within look-behind assertions - Fix processing (?(DEFINE)...) within look-behind assertions
- Prevent from a stack exhaustion when studying a pattern for nested groups by
putting a limit of 1000 recursive calls
* Mon Jan 13 2020 Petr Pisar <ppisar@redhat.com> - 10.34-4 * Mon Jan 13 2020 Petr Pisar <ppisar@redhat.com> - 10.34-4
- Fix a crash in JITted code when a *THEN verb is used in a lookahead assertion - Fix a crash in JITted code when a *THEN verb is used in a lookahead assertion