Revert a fix for a mismatch with a lookbehind within a lookahead within a lookbehind

This commit is contained in:
Petr Písař 2019-08-26 08:47:59 +02:00
parent b23673474f
commit 304ae352fc
5 changed files with 78 additions and 351 deletions

View File

@ -1,114 +0,0 @@
From 007b635b6788f8317747842b02f9c85137277c20 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Thu, 18 Jul 2019 17:20:29 +0000
Subject: [PATCH] Fix bug in recent patch for lookbehinds within lookaheads.
Fixes ClusterFuzz 15933.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1138 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 22 +++++++++++++---------
testdata/testinput2 | 3 +++
testdata/testoutput2 | 4 ++++
3 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 2ae95ed..b68c154 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -136,7 +136,8 @@ static BOOL
compile_block *);
static int
- check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
+ check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *,
+ compile_block *);
/*************************************************
@@ -9004,7 +9005,7 @@ for (;; pptr++)
case META_LOOKAHEAD:
case META_LOOKAHEADNOT:
- *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb);
if (*errcodeptr != 0) return -1;
/* Ignore any qualifiers that follow a lookahead assertion. */
@@ -9326,15 +9327,17 @@ order to process any lookbehinds that they may contain. It stops when it hits a
non-nested closing parenthesis in this case, returning a pointer to it.
Arguments
- pptr points to where to start (start of pattern or start of lookahead)
- retptr if not NULL, return the ket pointer here
- cb points to the compile block
+ pptr points to where to start (start of pattern or start of lookahead)
+ retptr if not NULL, return the ket pointer here
+ recurses chain of recurse_check to catch mutual recursion
+ cb points to the compile block
-Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
*/
static int
-check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
+check_lookbehinds(uint32_t *pptr, uint32_t **retptr,
+ parsed_recurse_check *recurses, compile_block *cb)
{
int errorcode = 0;
int loopcount = 0;
@@ -9449,7 +9452,8 @@ for (; *pptr != META_END; pptr++)
case META_LOOKBEHIND:
case META_LOOKBEHINDNOT:
- if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, NULL, cb))
+ if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount,
+ recurses, cb))
return errorcode;
break;
}
@@ -9899,7 +9903,7 @@ lengths. */
if (has_lookbehind)
{
- errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb);
if (errorcode != 0) goto HAD_CB_ERROR;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index d85fc5f..1bfe591 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5600,4 +5600,7 @@ a)"xI
/(?<=(?=.(?<=x)))/
ab\=ph
+# Expect error (recursion => not fixed length)
+/(\2)((?=(?<=\1)))/
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 6405e26..758b4db 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16952,6 +16952,10 @@ Failed: error 187 at offset 10: lookbehind assertion is too long
ab\=ph
No match
+# Expect error (recursion => not fixed length)
+/(\2)((?=(?<=\1)))/
+Failed: error 125 at offset 8: lookbehind assertion is not fixed length
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1

View File

@ -1,4 +1,4 @@
From 4c3e518bff94e5f206a63e3a1e5d7e570402786b Mon Sep 17 00:00:00 2001
From f966564e6e8fa85db153b827bf7e01879b8ee42b Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 3 Aug 2019 08:30:40 +0000
Subject: [PATCH] Fix incorrect computation of group length when one branch
@ -75,12 +75,12 @@ index e883c2e..cb5e7f1 100644
branchlength = 0;
had_recurse = FALSE;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 1bfe591..384239a 100644
index 9412bf6..b9d2826 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5603,4 +5603,12 @@ a)"xI
# Expect error (recursion => not fixed length)
/(\2)((?=(?<=\1)))/
@@ -5597,4 +5597,12 @@ a)"xI
# Multiplication overflow
/(X{65535})(?<=\1{32770})/
+/\A\s*(a|(?:[^`]{28500}){4})/I
+ a
@ -92,12 +92,12 @@ index 1bfe591..384239a 100644
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 758b4db..0983741 100644
index 950095f..16c8bd6 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16956,6 +16956,33 @@ No match
/(\2)((?=(?<=\1)))/
Failed: error 125 at offset 8: lookbehind assertion is not fixed length
@@ -16948,6 +16948,33 @@ Failed: error 187 at offset 15: lookbehind assertion is too long
/(X{65535})(?<=\1{32770})/
Failed: error 187 at offset 10: lookbehind assertion is too long
+/\A\s*(a|(?:[^`]{28500}){4})/I
+Capture group count = 1
@ -130,5 +130,5 @@ index 758b4db..0983741 100644
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1
2.21.0

View File

@ -1,217 +0,0 @@
From 44c8382acfe0902b302e0d7a5b1c6d9ee9226a51 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Tue, 16 Jul 2019 15:06:21 +0000
Subject: [PATCH] Fix lookbehind within lookahead within lookbehind
misbehaviour bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1133 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 58 +++++++++++++++++++++++++++++---------------
testdata/testinput1 | 6 +++++
testdata/testinput2 | 3 +++
testdata/testoutput1 | 9 +++++++
testdata/testoutput2 | 4 +++
5 files changed, 61 insertions(+), 19 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index f6e0a0b..2ae95ed 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -135,6 +135,8 @@ static BOOL
set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *,
compile_block *);
+static int
+ check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
/*************************************************
@@ -8997,15 +8999,15 @@ for (;; pptr++)
}
break;
- /* Lookaheads can be ignored, but we must start the skip inside the group
- so that it isn't treated as a group within the branch. */
+ /* Lookaheads do not contribute to the length of this branch, but they may
+ contain lookbehinds within them whose lengths need to be set. */
case META_LOOKAHEAD:
case META_LOOKAHEADNOT:
- pptr = parsed_skip(pptr + 1, PSKIP_KET);
- if (pptr == NULL) goto PARSED_SKIP_FAILED;
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
+ if (*errcodeptr != 0) return -1;
- /* Also ignore any qualifiers that follow a lookahead assertion. */
+ /* Ignore any qualifiers that follow a lookahead assertion. */
switch (pptr[1])
{
@@ -9319,20 +9321,28 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
the error offset is marked unset. The enables the functions above not to
override settings from deeper nestings.
-Arguments cb points to the compile block
-Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
+This function is called recursively from get_branchlength() for lookaheads in
+order to process any lookbehinds that they may contain. It stops when it hits a
+non-nested closing parenthesis in this case, returning a pointer to it.
+
+Arguments
+ pptr points to where to start (start of pattern or start of lookahead)
+ retptr if not NULL, return the ket pointer here
+ cb points to the compile block
+
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
*/
static int
-check_lookbehinds(compile_block *cb)
+check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
{
-uint32_t *pptr;
int errorcode = 0;
int loopcount = 0;
+int nestlevel = 0;
cb->erroroffset = PCRE2_UNSET;
-for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
+for (; *pptr != META_END; pptr++)
{
if (*pptr < META_END) continue; /* Literal */
@@ -9346,14 +9356,30 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
pptr += 1;
break;
+ case META_KET:
+ if (--nestlevel < 0)
+ {
+ if (retptr != NULL) *retptr = pptr;
+ return 0;
+ }
+ break;
+
+ case META_ATOMIC:
+ case META_CAPTURE:
+ case META_COND_ASSERT:
+ case META_LOOKAHEAD:
+ case META_LOOKAHEADNOT:
+ case META_NOCAPTURE:
+ case META_SCRIPT_RUN:
+ nestlevel++;
+ break;
+
case META_ACCEPT:
case META_ALT:
case META_ASTERISK:
case META_ASTERISK_PLUS:
case META_ASTERISK_QUERY:
- case META_ATOMIC:
case META_BACKREF:
- case META_CAPTURE:
case META_CIRCUMFLEX:
case META_CLASS:
case META_CLASS_EMPTY:
@@ -9361,14 +9387,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_CLASS_END:
case META_CLASS_NOT:
case META_COMMIT:
- case META_COND_ASSERT:
case META_DOLLAR:
case META_DOT:
case META_FAIL:
- case META_KET:
- case META_LOOKAHEAD:
- case META_LOOKAHEADNOT:
- case META_NOCAPTURE:
case META_PLUS:
case META_PLUS_PLUS:
case META_PLUS_QUERY:
@@ -9378,7 +9399,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_QUERY_QUERY:
case META_RANGE_ESCAPED:
case META_RANGE_LITERAL:
- case META_SCRIPT_RUN:
case META_SKIP:
case META_THEN:
break;
@@ -9879,7 +9899,7 @@ lengths. */
if (has_lookbehind)
{
- errorcode = check_lookbehinds(&cb);
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
if (errorcode != 0) goto HAD_CB_ERROR;
}
diff --git a/testdata/testinput1 b/testdata/testinput1
index 4d9ec5a..ee9354b 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6365,4 +6365,10 @@ ef) x/x,mark
/(?(DEFINE)(a|ab))(?1){1}+c/
abc
+/(?<=(?=.(?<=x)))/aftertext
+ abx
+
+/(?<=(?=(?<=a)))b/
+ ab
+
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index 9412bf6..d85fc5f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5597,4 +5597,7 @@ a)"xI
# Multiplication overflow
/(X{65535})(?<=\1{32770})/
+/(?<=(?=.(?<=x)))/
+ ab\=ph
+
# End of testinput2
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index fffb8ec..c9bfea8 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10081,4 +10081,13 @@ No match
abc
No match
+/(?<=(?=.(?<=x)))/aftertext
+ abx
+ 0:
+ 0+ x
+
+/(?<=(?=(?<=a)))b/
+ ab
+ 0: b
+
# End of testinput1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 950095f..6405e26 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16948,6 +16948,10 @@ Failed: error 187 at offset 15: lookbehind assertion is too long
/(X{65535})(?<=\1{32770})/
Failed: error 187 at offset 10: lookbehind assertion is too long
+/(?<=(?=.(?<=x)))/
+ ab\=ph
+No match
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1

View File

@ -0,0 +1,58 @@
From 134f44d6debd4ccf5510488f78bfc0f7fe850eff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
Date: Mon, 26 Aug 2019 08:42:47 +0200
Subject: [PATCH] Test a regression in a lookbehind after a condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The regression is caused with this upstream commit:
commit d70442f2c962ca816b8d03504a07e618c271a9fe
Author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Tue Jul 16 15:06:21 2019 +0000
Fix lookbehind within lookahead within lookbehind misbehaviour bug.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1133 6239d852-aaf2-0410-a92c-79f79f948069
https://bugs.exim.org/show_bug.cgi?id=2433
https://bugzilla.redhat.com/show_bug.cgi?id=1743863
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
testdata/testinput2 | 3 +++
testdata/testoutput2 | 6 ++++++
2 files changed, 9 insertions(+)
diff --git a/testdata/testinput2 b/testdata/testinput2
index 9ac8646..2dc99da 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5605,4 +5605,7 @@ a)"xI
/\A\s*((?:[^`]{28500}){4}|a)/I
a
+/^(?<A>a)(?(<A>)b)((?<=b).*)$/
+abc
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index bcc1aae..3875e1e 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16975,6 +16975,12 @@ Subject length lower bound = 1
0: a
1: a
+/^(?<A>a)(?(<A>)b)((?<=b).*)$/
+abc
+ 0: abc
+ 1: a
+ 2: c
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.21.0

View File

@ -9,7 +9,7 @@
#%%global rcversion RC1
Name: pcre2
Version: 10.33
Release: %{?rcversion:0.}11%{?rcversion:.%rcversion}%{?dist}
Release: %{?rcversion:0.}12%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library
# the library: BSD with exceptions
@ -79,17 +79,14 @@ Patch9: pcre2-10.33-Check-for-integer-overflow-when-computing-lookbehind.pat
# 2/2 Fix an integer overflow when checking a lookbehind length,
# in upstream after 10.33
Patch10: pcre2-10.33-Additional-overflow-test.patch
# 1/2 Fix a mismatch with a lookbehind within a lookahead within a lookbehind,
# upstream bug #2412, in upstream after 10.33
Patch11: pcre2-10.33-Fix-lookbehind-within-lookahead-within-lookbehind-mi.patch
# 2/2 Fix a mismatch with a lookbehind within a lookahead within a lookbehind,
# upstream bug #2412, in upstream after 10.33
Patch12: pcre2-10.33-Fix-bug-in-recent-patch-for-lookbehinds-within-looka.patch
# Fix an incorrect computation of a group length when a branch exceeds 65535,
# upstream bug #2428, in upstream after 10.33
Patch13: pcre2-10.33-Fix-incorrect-computation-of-group-length-when-one-b.patch
Patch11: pcre2-10.33-Fix-incorrect-computation-of-group-length-when-one-b.patch
# Fix reporting rightmost consulted characters, in upstream after 10.33
Patch14: pcre2-10.33-Fix-allusedtext-bug-rightmost-consulted-character-in.patch
Patch12: pcre2-10.33-Fix-allusedtext-bug-rightmost-consulted-character-in.patch
# Test a regression in a lookbehind after a condition, bug #1743863,
# upstream bug #2433, not in the upstream
Patch13: pcre2-10.33-Test-a-regression-in-a-lookbehind-after-a-condition.patch
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: coreutils
@ -181,7 +178,6 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%patch11 -p1
%patch12 -p1
%patch13 -p1
%patch14 -p1
# Because of multilib patch
libtoolize --copy --force
autoreconf -vif
@ -280,6 +276,10 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.*
%changelog
* Mon Aug 26 2019 Petr Pisar <ppisar@redhat.com> - 10.33-12
- Revert a fix for a mismatch with a lookbehind within a lookahead within
a lookbehind (bug #1743863)
* Mon Aug 12 2019 Petr Pisar <ppisar@redhat.com> - 10.33-11
- Fix reporting rightmost consulted characters