From 0dd869d72a472eca78ab323fd7318b3fae00593b Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 27 May 2017 15:49:29 +0000 Subject: [PATCH] Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Petr Písař: Ported to 10.23: commit 25926aac413c2b67359bc03c2a11a86a7a0718da Author: ph10 Date: Sat May 27 15:49:29 2017 +0000 Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@807 6239d852-aaf2-0410-a92c-79f79f948069 Signed-off-by: Petr Písař --- src/pcre2_dfa_match.c | 30 +++++++++++------------------- testdata/testinput6 | 6 ++++++ testdata/testoutput6 | 10 ++++++++++ 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 519a147..7f0f9bf 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -375,14 +375,10 @@ internal_dfa_match( { stateblock *active_states, *new_states, *temp_states; stateblock *next_active_state, *next_new_state; - const uint8_t *ctypes, *lcc, *fcc; PCRE2_SPTR ptr; PCRE2_SPTR end_code; -PCRE2_SPTR first_op; - dfa_recursion_info new_recursive; - int active_count, new_count, match_count; /* Some fields in the mb block are frequently referenced, so we load them into @@ -417,21 +413,15 @@ active_states = (stateblock *)(workspace + 2); next_new_state = new_states = active_states + wscount; new_count = 0; -first_op = this_start_code + 1 + LINK_SIZE + - ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || - *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) - ? IMM2_SIZE:0); - /* The first thing in any (sub) pattern is a bracket of some sort. Push all the alternative states onto the list, and find out where the end is. This makes is possible to use this function recursively, when we want to stop at a matching internal ket rather than at the end. -If the first opcode in the first alternative is OP_REVERSE, we are dealing with -a backward assertion. In that case, we have to find out the maximum amount to -move back, and set up each alternative appropriately. */ +If we are dealing with a backward assertion we have to find out the maximum +amount to move back, and set up each alternative appropriately. */ -if (*first_op == OP_REVERSE) +if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) { size_t max_back = 0; size_t gone_back; @@ -476,15 +466,17 @@ if (*first_op == OP_REVERSE) if (current_subject < mb->start_used_ptr) mb->start_used_ptr = current_subject; - /* Now we can process the individual branches. */ + /* Now we can process the individual branches. There will be an OP_REVERSE at + the start of each branch, except when the length of the branch is zero. */ end_code = this_start_code; do { - size_t back = (size_t)GET(end_code, 2+LINK_SIZE); + uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0; + size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE); if (back <= gone_back) { - int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE); + int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen); ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back)); } end_code += GET(end_code, 1); @@ -544,7 +536,7 @@ for (;;) BOOL partial_newline = FALSE; BOOL could_continue = reset_could_continue; reset_could_continue = FALSE; - + if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr; /* Make the new state list into the active state list and empty the @@ -597,7 +589,7 @@ for (;;) int state_offset = current_state->offset; int rrc; int count; - + /* A negative offset is a special case meaning "hold off going to this (negated) state until the number of characters in the data field have been skipped". If the could_continue flag was passed over from a previous @@ -633,7 +625,7 @@ for (;;) code = start_code + state_offset; codevalue = *code; - + /* If this opcode inspects a character, but we are at the end of the subject, remember the fact for use when testing for a partial match. */ diff --git a/testdata/testinput6 b/testdata/testinput6 index be9b767..9fb96d2 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4889,4 +4889,10 @@ /(02-)?[0-9]{3}-[0-9]{3}/ 02-123-123 +/(?<=abc|)/ + abcde\=aftertext + +/(?<=|abc)/ + abcde\=aftertext + # End of testinput6 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 2930acc..f75e731 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7691,4 +7691,14 @@ Failed: error -53: recursion limit exceeded 02-123-123 0: 02-123-123 +/(?<=abc|)/ + abcde\=aftertext + 0: + 0+ abcde + +/(?<=|abc)/ + abcde\=aftertext + 0: + 0+ abcde + # End of testinput6 -- 2.9.4