From 62130748594f803da49b6abf3e352e51148a3886 Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Tue, 4 Oct 2016 14:40:11 +0100 Subject: [PATCH] anchored/floating substrings must be utf8 if target is MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported to 5.24.0: commit 2814f4b3549f665a6f9203ac9e890ae1e415e0dc Author: Hugo van der Sanden Date: Tue Oct 4 14:40:11 2016 +0100 [perl #129350] anchored/floating substrings must be utf8 if target is If the target is utf8 and either the anchored or floating substrings are not, we need to create utf8 copies to check against. The state of the two substrings may not be the same, but we were only testing whichever we planned to check first. Signed-off-by: Petr Písař --- regexec.c | 3 ++- t/re/re_tests | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/regexec.c b/regexec.c index cdaa95c..38ff44a 100644 --- a/regexec.c +++ b/regexec.c @@ -703,7 +703,8 @@ Perl_re_intuit_start(pTHX_ reginfo->poscache_maxiter = 0; if (utf8_target) { - if (!prog->check_utf8 && prog->check_substr) + if ((!prog->anchored_utf8 && prog->anchored_substr) + || (!prog->float_utf8 && prog->float_substr)) to_utf8_substr(prog); check = prog->check_utf8; } else { diff --git a/t/re/re_tests b/t/re/re_tests index 7e8522d..2f4d00c 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1968,6 +1968,7 @@ ab(?#Comment){2}c abbc y $& abbc (?:.||)(?|)000000000@ 000000000@ y $& 000000000@ # [perl #126405] aa$|a(?R)a|a aaa y $& aaa # [perl 128420] recursive matches (?:\1|a)([bcd])\1(?:(?R)|e)\1 abbaccaddedcb y $& abbaccaddedcb # [perl 128420] recursive match with backreferences +\b\z0*\x{100} .\x{100} n - - # [perl #129350] crashed in intuit_start # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab -- 2.7.4