From c2d3ffd7201c377fa3ee548386ddfc0a7055dac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Fri, 16 Jun 2017 09:51:51 +0200 Subject: [PATCH] Fix DFA matching a lookbehind assertion that has a zero-length branch --- ...ith-zero-length-branch-in-DFA-matchi.patch | 155 ++++++++++++++++++ pcre2.spec | 10 +- 2 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch diff --git a/pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch b/pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch new file mode 100644 index 0000000..2c1d77c --- /dev/null +++ b/pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch @@ -0,0 +1,155 @@ +From 0dd869d72a472eca78ab323fd7318b3fae00593b Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 27 May 2017 15:49:29 +0000 +Subject: [PATCH] Fix lookbehind with zero-length branch in DFA matching. Fixes + oss-fuzz 1959. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Petr Písař: Ported to 10.23: + +commit 25926aac413c2b67359bc03c2a11a86a7a0718da +Author: ph10 +Date: Sat May 27 15:49:29 2017 +0000 + + Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959. + + git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@807 6239d852-aaf2-0410-a92c-79f79f948069 + +Signed-off-by: Petr Písař +--- + src/pcre2_dfa_match.c | 30 +++++++++++------------------- + testdata/testinput6 | 6 ++++++ + testdata/testoutput6 | 10 ++++++++++ + 3 files changed, 27 insertions(+), 19 deletions(-) + +diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c +index 519a147..7f0f9bf 100644 +--- a/src/pcre2_dfa_match.c ++++ b/src/pcre2_dfa_match.c +@@ -375,14 +375,10 @@ internal_dfa_match( + { + stateblock *active_states, *new_states, *temp_states; + stateblock *next_active_state, *next_new_state; +- + const uint8_t *ctypes, *lcc, *fcc; + PCRE2_SPTR ptr; + PCRE2_SPTR end_code; +-PCRE2_SPTR first_op; +- + dfa_recursion_info new_recursive; +- + int active_count, new_count, match_count; + + /* Some fields in the mb block are frequently referenced, so we load them into +@@ -417,21 +413,15 @@ active_states = (stateblock *)(workspace + 2); + next_new_state = new_states = active_states + wscount; + new_count = 0; + +-first_op = this_start_code + 1 + LINK_SIZE + +- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || +- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) +- ? IMM2_SIZE:0); +- + /* The first thing in any (sub) pattern is a bracket of some sort. Push all + the alternative states onto the list, and find out where the end is. This + makes is possible to use this function recursively, when we want to stop at a + matching internal ket rather than at the end. + +-If the first opcode in the first alternative is OP_REVERSE, we are dealing with +-a backward assertion. In that case, we have to find out the maximum amount to +-move back, and set up each alternative appropriately. */ ++If we are dealing with a backward assertion we have to find out the maximum ++amount to move back, and set up each alternative appropriately. */ + +-if (*first_op == OP_REVERSE) ++if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) + { + size_t max_back = 0; + size_t gone_back; +@@ -476,15 +466,17 @@ if (*first_op == OP_REVERSE) + if (current_subject < mb->start_used_ptr) + mb->start_used_ptr = current_subject; + +- /* Now we can process the individual branches. */ ++ /* Now we can process the individual branches. There will be an OP_REVERSE at ++ the start of each branch, except when the length of the branch is zero. */ + + end_code = this_start_code; + do + { +- size_t back = (size_t)GET(end_code, 2+LINK_SIZE); ++ uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0; ++ size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE); + if (back <= gone_back) + { +- int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE); ++ int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen); + ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back)); + } + end_code += GET(end_code, 1); +@@ -544,7 +536,7 @@ for (;;) + BOOL partial_newline = FALSE; + BOOL could_continue = reset_could_continue; + reset_could_continue = FALSE; +- ++ + if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr; + + /* Make the new state list into the active state list and empty the +@@ -597,7 +589,7 @@ for (;;) + int state_offset = current_state->offset; + int rrc; + int count; +- ++ + /* A negative offset is a special case meaning "hold off going to this + (negated) state until the number of characters in the data field have + been skipped". If the could_continue flag was passed over from a previous +@@ -633,7 +625,7 @@ for (;;) + + code = start_code + state_offset; + codevalue = *code; +- ++ + /* If this opcode inspects a character, but we are at the end of the + subject, remember the fact for use when testing for a partial match. */ + +diff --git a/testdata/testinput6 b/testdata/testinput6 +index be9b767..9fb96d2 100644 +--- a/testdata/testinput6 ++++ b/testdata/testinput6 +@@ -4889,4 +4889,10 @@ + /(02-)?[0-9]{3}-[0-9]{3}/ + 02-123-123 + ++/(?<=abc|)/ ++ abcde\=aftertext ++ ++/(?<=|abc)/ ++ abcde\=aftertext ++ + # End of testinput6 +diff --git a/testdata/testoutput6 b/testdata/testoutput6 +index 2930acc..f75e731 100644 +--- a/testdata/testoutput6 ++++ b/testdata/testoutput6 +@@ -7691,4 +7691,14 @@ Failed: error -53: recursion limit exceeded + 02-123-123 + 0: 02-123-123 + ++/(?<=abc|)/ ++ abcde\=aftertext ++ 0: ++ 0+ abcde ++ ++/(?<=|abc)/ ++ abcde\=aftertext ++ 0: ++ 0+ abcde ++ + # End of testinput6 +-- +2.9.4 + diff --git a/pcre2.spec b/pcre2.spec index 4e09644..175e080 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -2,7 +2,7 @@ #%%global rcversion RC1 Name: pcre2 Version: 10.23 -Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}8%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library Group: System Environment/Libraries @@ -81,6 +81,9 @@ Patch13: pcre2-10.23-Correct-an-incorrect-cast.patch # Fix a pcre2test crash on multiple push statements, upstream bug #2109, # in upstream after 10.23 Patch14: pcre2-10.23-Fix-crash-when-more-than-one-kind-of-push-was-set-in.patch +# Fix DFA matching a lookbehind assertion that has a zero-length branch, +# PCRE2 oss-fuzz issue 1859, in upstream after 10.23 +Patch15: pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -171,6 +174,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch12 -p1 %patch13 -p1 %patch14 -p1 +%patch15 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -269,6 +273,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Fri Jun 16 2017 Petr Pisar - 10.23-8 +- Fix DFA matching a lookbehind assertion that has a zero-length branch + (PCRE2 oss-fuzz issue 1859) + * Tue May 09 2017 Petr Pisar - 10.23-7 - Fix a pcre2test crash on multiple push statements (upstream bug #2109)