Fix DFA matching a lookbehind assertion that has a zero-length branch
This commit is contained in:
parent
adc9e95e1d
commit
c2d3ffd720
@ -0,0 +1,155 @@
|
||||
From 0dd869d72a472eca78ab323fd7318b3fae00593b Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Sat, 27 May 2017 15:49:29 +0000
|
||||
Subject: [PATCH] Fix lookbehind with zero-length branch in DFA matching. Fixes
|
||||
oss-fuzz 1959.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Petr Písař: Ported to 10.23:
|
||||
|
||||
commit 25926aac413c2b67359bc03c2a11a86a7a0718da
|
||||
Author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Sat May 27 15:49:29 2017 +0000
|
||||
|
||||
Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959.
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@807 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_dfa_match.c | 30 +++++++++++-------------------
|
||||
testdata/testinput6 | 6 ++++++
|
||||
testdata/testoutput6 | 10 ++++++++++
|
||||
3 files changed, 27 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
|
||||
index 519a147..7f0f9bf 100644
|
||||
--- a/src/pcre2_dfa_match.c
|
||||
+++ b/src/pcre2_dfa_match.c
|
||||
@@ -375,14 +375,10 @@ internal_dfa_match(
|
||||
{
|
||||
stateblock *active_states, *new_states, *temp_states;
|
||||
stateblock *next_active_state, *next_new_state;
|
||||
-
|
||||
const uint8_t *ctypes, *lcc, *fcc;
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR end_code;
|
||||
-PCRE2_SPTR first_op;
|
||||
-
|
||||
dfa_recursion_info new_recursive;
|
||||
-
|
||||
int active_count, new_count, match_count;
|
||||
|
||||
/* Some fields in the mb block are frequently referenced, so we load them into
|
||||
@@ -417,21 +413,15 @@ active_states = (stateblock *)(workspace + 2);
|
||||
next_new_state = new_states = active_states + wscount;
|
||||
new_count = 0;
|
||||
|
||||
-first_op = this_start_code + 1 + LINK_SIZE +
|
||||
- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
|
||||
- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
|
||||
- ? IMM2_SIZE:0);
|
||||
-
|
||||
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
|
||||
the alternative states onto the list, and find out where the end is. This
|
||||
makes is possible to use this function recursively, when we want to stop at a
|
||||
matching internal ket rather than at the end.
|
||||
|
||||
-If the first opcode in the first alternative is OP_REVERSE, we are dealing with
|
||||
-a backward assertion. In that case, we have to find out the maximum amount to
|
||||
-move back, and set up each alternative appropriately. */
|
||||
+If we are dealing with a backward assertion we have to find out the maximum
|
||||
+amount to move back, and set up each alternative appropriately. */
|
||||
|
||||
-if (*first_op == OP_REVERSE)
|
||||
+if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
|
||||
{
|
||||
size_t max_back = 0;
|
||||
size_t gone_back;
|
||||
@@ -476,15 +466,17 @@ if (*first_op == OP_REVERSE)
|
||||
if (current_subject < mb->start_used_ptr)
|
||||
mb->start_used_ptr = current_subject;
|
||||
|
||||
- /* Now we can process the individual branches. */
|
||||
+ /* Now we can process the individual branches. There will be an OP_REVERSE at
|
||||
+ the start of each branch, except when the length of the branch is zero. */
|
||||
|
||||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
- size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
|
||||
+ uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
|
||||
+ size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE);
|
||||
if (back <= gone_back)
|
||||
{
|
||||
- int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
|
||||
+ int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
|
||||
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
|
||||
}
|
||||
end_code += GET(end_code, 1);
|
||||
@@ -544,7 +536,7 @@ for (;;)
|
||||
BOOL partial_newline = FALSE;
|
||||
BOOL could_continue = reset_could_continue;
|
||||
reset_could_continue = FALSE;
|
||||
-
|
||||
+
|
||||
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
||||
|
||||
/* Make the new state list into the active state list and empty the
|
||||
@@ -597,7 +589,7 @@ for (;;)
|
||||
int state_offset = current_state->offset;
|
||||
int rrc;
|
||||
int count;
|
||||
-
|
||||
+
|
||||
/* A negative offset is a special case meaning "hold off going to this
|
||||
(negated) state until the number of characters in the data field have
|
||||
been skipped". If the could_continue flag was passed over from a previous
|
||||
@@ -633,7 +625,7 @@ for (;;)
|
||||
|
||||
code = start_code + state_offset;
|
||||
codevalue = *code;
|
||||
-
|
||||
+
|
||||
/* If this opcode inspects a character, but we are at the end of the
|
||||
subject, remember the fact for use when testing for a partial match. */
|
||||
|
||||
diff --git a/testdata/testinput6 b/testdata/testinput6
|
||||
index be9b767..9fb96d2 100644
|
||||
--- a/testdata/testinput6
|
||||
+++ b/testdata/testinput6
|
||||
@@ -4889,4 +4889,10 @@
|
||||
/(02-)?[0-9]{3}-[0-9]{3}/
|
||||
02-123-123
|
||||
|
||||
+/(?<=abc|)/
|
||||
+ abcde\=aftertext
|
||||
+
|
||||
+/(?<=|abc)/
|
||||
+ abcde\=aftertext
|
||||
+
|
||||
# End of testinput6
|
||||
diff --git a/testdata/testoutput6 b/testdata/testoutput6
|
||||
index 2930acc..f75e731 100644
|
||||
--- a/testdata/testoutput6
|
||||
+++ b/testdata/testoutput6
|
||||
@@ -7691,4 +7691,14 @@ Failed: error -53: recursion limit exceeded
|
||||
02-123-123
|
||||
0: 02-123-123
|
||||
|
||||
+/(?<=abc|)/
|
||||
+ abcde\=aftertext
|
||||
+ 0:
|
||||
+ 0+ abcde
|
||||
+
|
||||
+/(?<=|abc)/
|
||||
+ abcde\=aftertext
|
||||
+ 0:
|
||||
+ 0+ abcde
|
||||
+
|
||||
# End of testinput6
|
||||
--
|
||||
2.9.4
|
||||
|
10
pcre2.spec
10
pcre2.spec
@ -2,7 +2,7 @@
|
||||
#%%global rcversion RC1
|
||||
Name: pcre2
|
||||
Version: 10.23
|
||||
Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist}
|
||||
Release: %{?rcversion:0.}8%{?rcversion:.%rcversion}%{?dist}
|
||||
%global myversion %{version}%{?rcversion:-%rcversion}
|
||||
Summary: Perl-compatible regular expression library
|
||||
Group: System Environment/Libraries
|
||||
@ -81,6 +81,9 @@ Patch13: pcre2-10.23-Correct-an-incorrect-cast.patch
|
||||
# Fix a pcre2test crash on multiple push statements, upstream bug #2109,
|
||||
# in upstream after 10.23
|
||||
Patch14: pcre2-10.23-Fix-crash-when-more-than-one-kind-of-push-was-set-in.patch
|
||||
# Fix DFA matching a lookbehind assertion that has a zero-length branch,
|
||||
# PCRE2 oss-fuzz issue 1859, in upstream after 10.23
|
||||
Patch15: pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch
|
||||
BuildRequires: autoconf
|
||||
BuildRequires: automake
|
||||
BuildRequires: coreutils
|
||||
@ -171,6 +174,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
|
||||
%patch12 -p1
|
||||
%patch13 -p1
|
||||
%patch14 -p1
|
||||
%patch15 -p1
|
||||
# Because of multilib patch
|
||||
libtoolize --copy --force
|
||||
autoreconf -vif
|
||||
@ -269,6 +273,10 @@ make %{?_smp_mflags} check VERBOSE=yes
|
||||
%{_mandir}/man1/pcre2test.*
|
||||
|
||||
%changelog
|
||||
* Fri Jun 16 2017 Petr Pisar <ppisar@redhat.com> - 10.23-8
|
||||
- Fix DFA matching a lookbehind assertion that has a zero-length branch
|
||||
(PCRE2 oss-fuzz issue 1859)
|
||||
|
||||
* Tue May 09 2017 Petr Pisar <ppisar@redhat.com> - 10.23-7
|
||||
- Fix a pcre2test crash on multiple push statements (upstream bug #2109)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user