From a08fa6fd157fd0d61da7f20f07b939fbc302c2c6 Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Wed, 5 Oct 2016 12:56:05 +0100 Subject: [PATCH] [perl #129377] don't read past start of string for unmatched backref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported to 5.24.1: commit 2dfc11ec3af312f4fa3eb244077c79dbb5fc2d85 Author: Hugo van der Sanden Date: Wed Oct 5 12:56:05 2016 +0100 [perl #129377] don't read past start of string for unmatched backref We can have (start, end) == (0, -1) for an unmatched backref, we must check for that. Signed-off-by: Petr Písař --- regexec.c | 10 ++++++---- t/re/pat.t | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/regexec.c b/regexec.c index a5d5db4..a7bc0c3 100644 --- a/regexec.c +++ b/regexec.c @@ -5179,6 +5179,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) regnode *next; U32 n = 0; /* general value; init to avoid compiler warning */ SSize_t ln = 0; /* len or last; init to avoid compiler warning */ + SSize_t endref = 0; /* offset of end of backref when ln is start */ char *locinput = startpos; char *pushinput; /* where to continue after a PUSH */ I32 nextchr; /* is always set to UCHARAT(locinput), or -1 at EOS */ @@ -6489,10 +6490,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) do_nref_ref_common: ln = rex->offs[n].start; + endref = rex->offs[n].end; reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */ - if (rex->lastparen < n || ln == -1) + if (rex->lastparen < n || ln == -1 || endref == -1) sayNO; /* Do not match unless seen CLOSEn. */ - if (ln == rex->offs[n].end) + if (ln == endref) break; s = reginfo->strbeg + ln; @@ -6506,7 +6508,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) * not going off the end given by reginfo->strend, and * returns in upon success, how much of the * current input was matched */ - if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target, + if (! foldEQ_utf8_flags(s, NULL, endref - ln, utf8_target, locinput, &limit, 0, utf8_target, utf8_fold_flags)) { sayNO; @@ -6521,7 +6523,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) (type == REF || UCHARAT(s) != fold_array[nextchr])) sayNO; - ln = rex->offs[n].end - ln; + ln = endref - ln; if (locinput + ln > reginfo->strend) sayNO; if (ln > 1 && (type == REF diff --git a/t/re/pat.t b/t/re/pat.t index 4aa77cf..749edd0 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -23,7 +23,7 @@ BEGIN { skip_all_without_unicode_tables(); } -plan tests => 791; # Update this when adding/deleting tests. +plan tests => 792; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1765,6 +1765,20 @@ EOP utf8::upgrade($str); ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - utf8 case" ); } + { + # [perl #129377] backref to an unmatched capture should not cause + # reading before start of string. + SKIP: { + skip "no re-debug under miniperl" if is_miniperl; + my $prog = <<'EOP'; +use re qw(Debug EXECUTE); +"x" =~ m{ () y | () \1 }x; +EOP + fresh_perl_like($prog, qr{ + \A (?! .* ^ \s+ - ) + }msx, { stderr => 1 }, "Offsets in debug output are not negative"); + } + } } # End of sub run_tests 1; -- 2.7.4