121 lines
5.2 KiB
Diff
121 lines
5.2 KiB
Diff
From 12c313ce49b36160a7ca2e9b07ad5bd92ee4a010 Mon Sep 17 00:00:00 2001
|
|
From: Karl Williamson <khw@cpan.org>
|
|
Date: Sat, 9 Sep 2023 11:59:09 -0600
|
|
Subject: [PATCH 1/2] Fix read/write past buffer end: perl-security#140
|
|
|
|
A package name may be specified in a \p{...} regular expression
|
|
construct. If unspecified, "utf8::" is assumed, which is the package
|
|
all official Unicode properties are in. By specifying a different
|
|
package, one can create a user-defined property with the same
|
|
unqualified name as a Unicode one. Such a property is defined by a sub
|
|
whose name begins with "Is" or "In", and if the sub wishes to refer to
|
|
an official Unicode property, it must explicitly specify the "utf8::".
|
|
S_parse_uniprop_string() is used to parse the interior of both \p{} and
|
|
the user-defined sub lines.
|
|
|
|
In S_parse_uniprop_string(), it parses the input "name" parameter,
|
|
creating a modified copy, "lookup_name", malloc'ed with the same size as
|
|
"name". The modifications are essentially to create a canonicalized
|
|
version of the input, with such things as extraneous white-space
|
|
stripped off. I found it convenient to strip off the package specifier
|
|
"utf8::". To to so, the code simply pretends "lookup_name" begins just
|
|
after the "utf8::", and adjusts various other values to compensate.
|
|
However, it missed the adjustment of one required one.
|
|
|
|
This is only a problem when the property name begins with "perl" and
|
|
isn't "perlspace" nor "perlword". All such ones are undocumented
|
|
internal properties.
|
|
|
|
What happens in this case is that the input is reparsed with slightly
|
|
different rules in effect as to what is legal versus illegal. The
|
|
problem is that "lookup_name" no longer is pointing to its initial
|
|
value, but "name" is. Thus the space allocated for filling "lookup_name"
|
|
is now shorter than "name", and as this shortened "lookup_name" is
|
|
filled by copying suitable portions of "name", the write can be to
|
|
unallocated space.
|
|
|
|
The solution is to skip the "utf8::" when reparsing "name". Then both
|
|
"lookup_name" and "name" are effectively shortened by the same amount,
|
|
and there is no going off the end.
|
|
|
|
This commit also does white-space adjustment so that things align
|
|
vertically for readability.
|
|
|
|
This can be easily backported to earlier Perl releases.
|
|
---
|
|
regcomp.c | 17 +++++++++++------
|
|
t/re/pat_advanced.t | 8 ++++++++
|
|
2 files changed, 19 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/regcomp.c b/regcomp.c
|
|
index 9c6ccc2c1b..833f8644f7 100644
|
|
--- a/regcomp.c
|
|
+++ b/regcomp.c
|
|
@@ -23697,7 +23697,7 @@ S_parse_uniprop_string(pTHX_
|
|
* compile perl to know about them) */
|
|
bool is_nv_type = FALSE;
|
|
|
|
- unsigned int i, j = 0;
|
|
+ unsigned int i = 0, i_zero = 0, j = 0;
|
|
int equals_pos = -1; /* Where the '=' is found, or negative if none */
|
|
int slash_pos = -1; /* Where the '/' is found, or negative if none */
|
|
int table_index = 0; /* The entry number for this property in the table
|
|
@@ -23831,9 +23831,13 @@ S_parse_uniprop_string(pTHX_
|
|
* all of them are considered to be for that package. For the purposes of
|
|
* parsing the rest of the property, strip it off */
|
|
if (non_pkg_begin == STRLENs("utf8::") && memBEGINPs(name, name_len, "utf8::")) {
|
|
- lookup_name += STRLENs("utf8::");
|
|
- j -= STRLENs("utf8::");
|
|
- equals_pos -= STRLENs("utf8::");
|
|
+ lookup_name += STRLENs("utf8::");
|
|
+ j -= STRLENs("utf8::");
|
|
+ equals_pos -= STRLENs("utf8::");
|
|
+ i_zero = STRLENs("utf8::"); /* When resetting 'i' to reparse
|
|
+ from the beginning, it has to be
|
|
+ set past what we're stripping
|
|
+ off */
|
|
stripped_utf8_pkg = TRUE;
|
|
}
|
|
|
|
@@ -24238,7 +24242,8 @@ S_parse_uniprop_string(pTHX_
|
|
|
|
/* We set the inputs back to 0 and the code below will reparse,
|
|
* using strict */
|
|
- i = j = 0;
|
|
+ i = i_zero;
|
|
+ j = 0;
|
|
}
|
|
}
|
|
|
|
@@ -24259,7 +24264,7 @@ S_parse_uniprop_string(pTHX_
|
|
* separates two digits */
|
|
if (cur == '_') {
|
|
if ( stricter
|
|
- && ( i == 0 || (int) i == equals_pos || i == name_len- 1
|
|
+ && ( i == i_zero || (int) i == equals_pos || i == name_len- 1
|
|
|| ! isDIGIT_A(name[i-1]) || ! isDIGIT_A(name[i+1])))
|
|
{
|
|
lookup_name[j++] = '_';
|
|
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
|
|
index 6152c7b85c..1db317fff9 100644
|
|
--- a/t/re/pat_advanced.t
|
|
+++ b/t/re/pat_advanced.t
|
|
@@ -2576,6 +2576,14 @@ EOF
|
|
{}, "GH #17278");
|
|
}
|
|
|
|
+ { # perl-security#140, read/write past buffer end
|
|
+ fresh_perl_like('qr/\p{utf8::perl x}/',
|
|
+ qr/Illegal user-defined property name "utf8::perl x" in regex/,
|
|
+ {}, "perl-security#140");
|
|
+ fresh_perl_is('qr/\p{utf8::_perl_surrogate}/', "",
|
|
+ {}, "perl-security#140");
|
|
+ }
|
|
+
|
|
|
|
# !!! NOTE that tests that aren't at all likely to crash perl should go
|
|
# a ways above, above these last ones. There's a comment there that, like
|
|
--
|
|
2.34.1
|
|
|