Fix DFA matching a lookbehind assertion that has a zero-length branch

This commit is contained in:
Petr Písař 2017-06-16 09:51:51 +02:00
parent adc9e95e1d
commit c2d3ffd720
2 changed files with 164 additions and 1 deletions

View File

@ -0,0 +1,155 @@
From 0dd869d72a472eca78ab323fd7318b3fae00593b Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 27 May 2017 15:49:29 +0000
Subject: [PATCH] Fix lookbehind with zero-length branch in DFA matching. Fixes
oss-fuzz 1959.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Petr Písař: Ported to 10.23:
commit 25926aac413c2b67359bc03c2a11a86a7a0718da
Author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat May 27 15:49:29 2017 +0000
Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@807 6239d852-aaf2-0410-a92c-79f79f948069
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_dfa_match.c | 30 +++++++++++-------------------
testdata/testinput6 | 6 ++++++
testdata/testoutput6 | 10 ++++++++++
3 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 519a147..7f0f9bf 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -375,14 +375,10 @@ internal_dfa_match(
{
stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state;
-
const uint8_t *ctypes, *lcc, *fcc;
PCRE2_SPTR ptr;
PCRE2_SPTR end_code;
-PCRE2_SPTR first_op;
-
dfa_recursion_info new_recursive;
-
int active_count, new_count, match_count;
/* Some fields in the mb block are frequently referenced, so we load them into
@@ -417,21 +413,15 @@ active_states = (stateblock *)(workspace + 2);
next_new_state = new_states = active_states + wscount;
new_count = 0;
-first_op = this_start_code + 1 + LINK_SIZE +
- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
- ? IMM2_SIZE:0);
-
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
the alternative states onto the list, and find out where the end is. This
makes is possible to use this function recursively, when we want to stop at a
matching internal ket rather than at the end.
-If the first opcode in the first alternative is OP_REVERSE, we are dealing with
-a backward assertion. In that case, we have to find out the maximum amount to
-move back, and set up each alternative appropriately. */
+If we are dealing with a backward assertion we have to find out the maximum
+amount to move back, and set up each alternative appropriately. */
-if (*first_op == OP_REVERSE)
+if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
{
size_t max_back = 0;
size_t gone_back;
@@ -476,15 +466,17 @@ if (*first_op == OP_REVERSE)
if (current_subject < mb->start_used_ptr)
mb->start_used_ptr = current_subject;
- /* Now we can process the individual branches. */
+ /* Now we can process the individual branches. There will be an OP_REVERSE at
+ the start of each branch, except when the length of the branch is zero. */
end_code = this_start_code;
do
{
- size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
+ uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
+ size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE);
if (back <= gone_back)
{
- int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
+ int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
}
end_code += GET(end_code, 1);
@@ -544,7 +536,7 @@ for (;;)
BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE;
-
+
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
/* Make the new state list into the active state list and empty the
@@ -597,7 +589,7 @@ for (;;)
int state_offset = current_state->offset;
int rrc;
int count;
-
+
/* A negative offset is a special case meaning "hold off going to this
(negated) state until the number of characters in the data field have
been skipped". If the could_continue flag was passed over from a previous
@@ -633,7 +625,7 @@ for (;;)
code = start_code + state_offset;
codevalue = *code;
-
+
/* If this opcode inspects a character, but we are at the end of the
subject, remember the fact for use when testing for a partial match. */
diff --git a/testdata/testinput6 b/testdata/testinput6
index be9b767..9fb96d2 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -4889,4 +4889,10 @@
/(02-)?[0-9]{3}-[0-9]{3}/
02-123-123
+/(?<=abc|)/
+ abcde\=aftertext
+
+/(?<=|abc)/
+ abcde\=aftertext
+
# End of testinput6
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 2930acc..f75e731 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -7691,4 +7691,14 @@ Failed: error -53: recursion limit exceeded
02-123-123
0: 02-123-123
+/(?<=abc|)/
+ abcde\=aftertext
+ 0:
+ 0+ abcde
+
+/(?<=|abc)/
+ abcde\=aftertext
+ 0:
+ 0+ abcde
+
# End of testinput6
--
2.9.4

View File

@ -2,7 +2,7 @@
#%%global rcversion RC1 #%%global rcversion RC1
Name: pcre2 Name: pcre2
Version: 10.23 Version: 10.23
Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist} Release: %{?rcversion:0.}8%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion} %global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library Summary: Perl-compatible regular expression library
Group: System Environment/Libraries Group: System Environment/Libraries
@ -81,6 +81,9 @@ Patch13: pcre2-10.23-Correct-an-incorrect-cast.patch
# Fix a pcre2test crash on multiple push statements, upstream bug #2109, # Fix a pcre2test crash on multiple push statements, upstream bug #2109,
# in upstream after 10.23 # in upstream after 10.23
Patch14: pcre2-10.23-Fix-crash-when-more-than-one-kind-of-push-was-set-in.patch Patch14: pcre2-10.23-Fix-crash-when-more-than-one-kind-of-push-was-set-in.patch
# Fix DFA matching a lookbehind assertion that has a zero-length branch,
# PCRE2 oss-fuzz issue 1859, in upstream after 10.23
Patch15: pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch
BuildRequires: autoconf BuildRequires: autoconf
BuildRequires: automake BuildRequires: automake
BuildRequires: coreutils BuildRequires: coreutils
@ -171,6 +174,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%patch12 -p1 %patch12 -p1
%patch13 -p1 %patch13 -p1
%patch14 -p1 %patch14 -p1
%patch15 -p1
# Because of multilib patch # Because of multilib patch
libtoolize --copy --force libtoolize --copy --force
autoreconf -vif autoreconf -vif
@ -269,6 +273,10 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.* %{_mandir}/man1/pcre2test.*
%changelog %changelog
* Fri Jun 16 2017 Petr Pisar <ppisar@redhat.com> - 10.23-8
- Fix DFA matching a lookbehind assertion that has a zero-length branch
(PCRE2 oss-fuzz issue 1859)
* Tue May 09 2017 Petr Pisar <ppisar@redhat.com> - 10.23-7 * Tue May 09 2017 Petr Pisar <ppisar@redhat.com> - 10.23-7
- Fix a pcre2test crash on multiple push statements (upstream bug #2109) - Fix a pcre2test crash on multiple push statements (upstream bug #2109)