From f34cc5af94622240abbf730ac82c4f91cc4ffb83 Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Tue, 4 Oct 2016 14:40:11 +0100 Subject: [PATCH] anchored/floating substrings must be utf8 if target is MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported to 5.24.4: commit 2814f4b3549f665a6f9203ac9e890ae1e415e0dc Author: Hugo van der Sanden Date: Tue Oct 4 14:40:11 2016 +0100 [perl #129350] anchored/floating substrings must be utf8 if target is If the target is utf8 and either the anchored or floating substrings are not, we need to create utf8 copies to check against. The state of the two substrings may not be the same, but we were only testing whichever we planned to check first. Signed-off-by: Petr Písař --- regexec.c | 3 ++- t/re/re_tests | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/regexec.c b/regexec.c index ff8e89c..6904546 100644 --- a/regexec.c +++ b/regexec.c @@ -703,7 +703,8 @@ Perl_re_intuit_start(pTHX_ reginfo->poscache_maxiter = 0; if (utf8_target) { - if (!prog->check_utf8 && prog->check_substr) + if ((!prog->anchored_utf8 && prog->anchored_substr) + || (!prog->float_utf8 && prog->float_substr)) to_utf8_substr(prog); check = prog->check_utf8; } else { diff --git a/t/re/re_tests b/t/re/re_tests index ab7ddbb..8b0feaa 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1969,6 +1969,7 @@ ab(?#Comment){2}c abbc y $& abbc aa$|a(?R)a|a aaa y $& aaa # [perl 128420] recursive matches (?:\1|a)([bcd])\1(?:(?R)|e)\1 abbaccaddedcb y $& abbaccaddedcb # [perl 128420] recursive match with backreferences (?il)\x{100}|\x{100}|\x{FF} \xFF y $& \xFF +\b\z0*\x{100} .\x{100} n - - # [perl #129350] crashed in intuit_start # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab -- 2.14.3