perl/perl-5.18.0-Fix-regmatch-pointer-32-bit-wraparound-regression.patch
2013-07-08 10:58:14 +02:00

110 lines
4.0 KiB
Diff

From 4149c7198d9b78d861df289cce40dd865cab57e7 Mon Sep 17 00:00:00 2001
From: Tony Cook <tony@develop-help.com>
Date: Mon, 3 Jun 2013 22:28:37 +1000
Subject: [PATCH] Fix regmatch pointer 32-bit wraparound regression
Cherry-picked from:
commit 285a3ca139d04d2ee1894c9a9110294ee8bb0309
Merge: aad0429 dfb8f19
Author: Tony Cook <tony@develop-help.com>
AuthorDate: Mon Jun 3 22:28:37 2013 +1000
Commit: Tony Cook <tony@develop-help.com>
CommitDate: Mon Jun 3 22:28:37 2013 +1000
[perl #118175] avoid making pointers outside of objects
In a couple of cases, when strings were allocated above the 2GB line
on 32-bit CPUs, this could cause regexps to act strangely - not matching
or crashing perl.
The final patch in the set prevents pointer creation which the C standard
describes as undefined behaviour, but is typically safe (as long as the
pointer isn't derefed)
This regression was introduced into 5.18.0 by commit
4063ade8503ac8877a02fc4eae8ebbe242b9110b.
---
regexec.c | 8 ++++----
t/re/pat_rt_report.t | 17 ++++++++++++++++-
2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/regexec.c b/regexec.c
index bc38839..b865b46 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6662,7 +6662,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
scan = *startposp;
if (max == REG_INFTY)
max = I32_MAX;
- else if (! utf8_target && scan + max < loceol)
+ else if (! utf8_target && loceol - scan > max)
loceol = scan + max;
/* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
@@ -6711,7 +6711,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
scan = loceol;
break;
case CANY: /* Move <scan> forward <max> bytes, unless goes off end */
- if (utf8_target && scan + max < loceol) {
+ if (utf8_target && loceol - scan > max) {
/* <loceol> hadn't been adjusted in the UTF-8 case */
scan += max;
@@ -6730,7 +6730,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
* can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
* true iff it doesn't matter if the argument is in UTF-8 or not */
if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! is_utf8_pat)) {
- if (utf8_target && scan + max < loceol) {
+ if (utf8_target && loceol - scan > max) {
/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
* since here, to match at all, 1 char == 1 byte */
loceol = scan + max;
@@ -6910,7 +6910,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
/* FALLTHROUGH */
case POSIXA:
- if (utf8_target && scan + max < loceol) {
+ if (utf8_target && loceol - scan > max) {
/* We didn't adjust <loceol> at the beginning of this routine
* because is UTF-8, but it is actually ok to do so, since here, to
diff --git a/t/re/pat_rt_report.t b/t/re/pat_rt_report.t
index 2244fdf..9a9b5f5 100644
--- a/t/re/pat_rt_report.t
+++ b/t/re/pat_rt_report.t
@@ -22,7 +22,7 @@ BEGIN {
}
-plan tests => 2530; # Update this when adding/deleting tests.
+plan tests => 2532; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1158,6 +1158,21 @@ EOP
'$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"',
"c\n", {}, '$\' first mentioned after match');
}
+
+ {
+ # [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t
+ # this tests some related failures
+ #
+ # The tests in the block *only* fail when run on 32-bit systems
+ # with a malloc that allocates above the 2GB line. On the system
+ # in the report above that only happened in a thread.
+ my $s = "\x{1ff}" . "f" x 32;
+ ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap");
+
+ # this one segfaulted under the conditions above
+ # of course, CANY is evil, maybe it should crash
+ ok($s =~ /.\C+/, "CANY pointer wrap");
+ }
} # End of sub run_tests
1;
--
1.8.1.4