From f3704e62341b10824f503aa0c8029670d101a434 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 11 Feb 2017 11:53:41 +0000 Subject: [PATCH] fix pad/scope issue in re_evals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported to 5.24.1: commit 4b9c7caeaecf4e9df0be3a2e296644f763f775d6 Author: David Mitchell Date: Sat Feb 11 11:53:41 2017 +0000 fix pad/scope issue in re_evals RT #129881 heap-buffer-overflow Perl_pad_sv In some circumstances involving a pattern which has embedded code blocks from more than one source, e.g. my $r = qr{(?{1;}){2}X}; "" =~ /$r|(?{1;})/; the wrong PL_comppad could be active while doing a LEAVE_SCOPE() or on exit from the pattern. This was mainly due to the big context stack changes in 5.24.0 - in particular, since POP_MULTICALL() now does CX_LEAVE_SCOPE(cx) *before* restoring PL_comppad, the (correct) unwinding of any SAVECOMPPAD's was being followed by Cblk_sub.prevcomppad>, which wasn't necessarily a sensible value. To fix this, record the value of PL_savestack_ix at entry to S_regmatch(), and set the cx->blk_oldsaveix of the MULTICALL to this value when pushed. On exit from S_regmatch, we either POP_MULTICALL which will do a LEAVE_SCOPE(cx->blk_oldsaveix), or in the absense of any EVAL, do the explicit but equivalent LEAVE_SCOPE(orig_savestack_ix). Note that this is a change in behaviour to S_regmatch() - formerly it wouldn't necessarily clear the savestack completely back the point of entry - that would get left to do by its caller, S_regtry(), or indirectly by Perl_regexec_flags(). This shouldn't make any practical difference, but is tidier and less likely to introduce bugs later. Signed-off-by: Petr Písař --- regexec.c | 69 +++++++++++++++++++++++++++++++++++++++++++----------- t/re/pat_re_eval.t | 20 +++++++++++++++- 2 files changed, 74 insertions(+), 15 deletions(-) diff --git a/regexec.c b/regexec.c index a7bc0c3..5656cdd 100644 --- a/regexec.c +++ b/regexec.c @@ -5233,6 +5233,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) _char_class_number classnum; bool is_utf8_pat = reginfo->is_utf8_pat; bool match = FALSE; + I32 orig_savestack_ix = PL_savestack_ix; /* Solaris Studio 12.3 messes up fetching PL_charclass['\n'] */ #if (defined(__SUNPRO_C) && (__SUNPRO_C == 0x5120) && defined(__x86_64) && defined(USE_64_BIT_ALL)) @@ -6646,30 +6647,67 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) nop = (OP*)rexi->data->data[n]; } - /* normally if we're about to execute code from the same - * CV that we used previously, we just use the existing - * CX stack entry. However, its possible that in the - * meantime we may have backtracked, popped from the save - * stack, and undone the SAVECOMPPAD(s) associated with - * PUSH_MULTICALL; in which case PL_comppad no longer - * points to newcv's pad. */ + /* Some notes about MULTICALL and the context and save stacks. + * + * In something like + * /...(?{ my $x)}...(?{ my $z)}...(?{ my $z)}.../ + * since codeblocks don't introduce a new scope (so that + * local() etc accumulate), at the end of a successful + * match there will be a SAVEt_CLEARSV on the savestack + * for each of $x, $y, $z. If the three code blocks above + * happen to have come from different CVs (e.g. via + * embedded qr//s), then we must ensure that during any + * savestack unwinding, PL_comppad always points to the + * right pad at each moment. We achieve this by + * interleaving SAVEt_COMPPAD's on the savestack whenever + * there is a change of pad. + * In theory whenever we call a code block, we should + * push a CXt_SUB context, then pop it on return from + * that code block. This causes a bit of an issue in that + * normally popping a context also clears the savestack + * back to cx->blk_oldsaveix, but here we specifically + * don't want to clear the save stack on exit from the + * code block. + * Also for efficiency we don't want to keep pushing and + * popping the single SUB context as we backtrack etc. + * So instead, we push a single context the first time + * we need, it, then hang onto it until the end of this + * function. Whenever we encounter a new code block, we + * update the CV etc if that's changed. During the times + * in this function where we're not executing a code + * block, having the SUB context still there is a bit + * naughty - but we hope that no-one notices. + * When the SUB context is initially pushed, we fake up + * cx->blk_oldsaveix to be as if we'd pushed this context + * on first entry to S_regmatch rather than at some random + * point during the regexe execution. That way if we + * croak, popping the context stack will ensure that + * *everything* SAVEd by this function is undone and then + * the context popped, rather than e.g., popping the + * context (and restoring the original PL_comppad) then + * popping more of the savestack and restoiring a bad + * PL_comppad. + */ + + /* If this is the first EVAL, push a MULTICALL. On + * subsequent calls, if we're executing a different CV, or + * if PL_comppad has got messed up from backtracking + * through SAVECOMPPADs, then refresh the context. + */ if (newcv != last_pushed_cv || PL_comppad != last_pad) { U8 flags = (CXp_SUB_RE | ((newcv == caller_cv) ? CXp_SUB_RE_FAKE : 0)); + SAVECOMPPAD(); if (last_pushed_cv) { - /* PUSH/POP_MULTICALL save and restore the - * caller's PL_comppad; if we call multiple subs - * using the same CX block, we have to save and - * unwind the varying PL_comppad's ourselves, - * especially restoring the right PL_comppad on - * backtrack - so save it on the save stack */ - SAVECOMPPAD(); CHANGE_MULTICALL_FLAGS(newcv, flags); } else { PUSH_MULTICALL_FLAGS(newcv, flags); } + /* see notes above */ + CX_CUR()->blk_oldsaveix = orig_savestack_ix; + last_pushed_cv = newcv; } else { @@ -8456,9 +8494,12 @@ NULL if (last_pushed_cv) { dSP; + /* see "Some notes about MULTICALL" above */ POP_MULTICALL; PERL_UNUSED_VAR(SP); } + else + LEAVE_SCOPE(orig_savestack_ix); assert(!result || locinput - reginfo->strbeg >= 0); return result ? locinput - reginfo->strbeg : -1; diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index e59b059..1a0b228 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -22,7 +22,7 @@ BEGIN { } -plan tests => 527; # Update this when adding/deleting tests. +plan tests => 530; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1232,6 +1232,24 @@ sub run_tests { 'padtmp swiping does not affect "$a$b" =~ /(??{})/' } + # RT #129881 + # on exit from a pattern with multiple code blocks from different + # CVs, PL_comppad wasn't being restored correctly + + sub { + # give first few pad slots known values + my ($x1, $x2, $x3, $x4, $x5) = 101..105; + # these vars are in a separate pad + my $r = qr/((?{my ($y1, $y2) = 201..202; 1;})A){2}X/; + # the first alt fails, causing a switch to this anon + # sub's pad + "AAA" =~ /$r|(?{my ($z1, $z2) = 301..302; 1;})A/; + is $x1, 101, "RT #129881: x1"; + is $x2, 102, "RT #129881: x2"; + is $x3, 103, "RT #129881: x3"; + }->(); + + } # End of sub run_tests 1; -- 2.7.4