Fix an unintended upgrade to UTF-8 in the middle of a transliteration
This commit is contained in:
parent
c33e239bcc
commit
eadda09063
@ -0,0 +1,78 @@
|
|||||||
|
From 0c311b7c345769239f38d0139ea7738feec5ca4d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Karl Williamson <khw@cpan.org>
|
||||||
|
Date: Sat, 2 Nov 2019 13:59:38 -0600
|
||||||
|
Subject: [PATCH] toke.c: Fix bug tr/// upgrading to UTF-8 in middle
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Consider tr/\x{ff}-\x{100}/AB/.
|
||||||
|
|
||||||
|
While parsing, the code keeps an offset from the beginning of the output
|
||||||
|
to the beginning of the second number in the range. This is purely for
|
||||||
|
speed so that it wouldn't have to re-find the beginning of that value,
|
||||||
|
when it already knew it.
|
||||||
|
|
||||||
|
But the example above shows the folly of this shortcut. The second
|
||||||
|
number in the range causes the output to be upgraded to UTF-8, which
|
||||||
|
makes that offset invalid in general. Change to re-find the beginning.
|
||||||
|
|
||||||
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||||
|
---
|
||||||
|
t/op/tr.t | 12 +++++++++++-
|
||||||
|
toke.c | 4 +++-
|
||||||
|
2 files changed, 14 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/t/op/tr.t b/t/op/tr.t
|
||||||
|
index 47d603d4fd..25125c5bc7 100644
|
||||||
|
--- a/t/op/tr.t
|
||||||
|
+++ b/t/op/tr.t
|
||||||
|
@@ -13,7 +13,7 @@ BEGIN {
|
||||||
|
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
-plan tests => 301;
|
||||||
|
+plan tests => 304;
|
||||||
|
|
||||||
|
# Test this first before we extend the stack with other operations.
|
||||||
|
# This caused an asan failure due to a bad write past the end of the stack.
|
||||||
|
@@ -1145,4 +1145,14 @@ for ("", nullrocow) {
|
||||||
|
'RT #133880 illegal \N{}');
|
||||||
|
}
|
||||||
|
|
||||||
|
+{
|
||||||
|
+ my $c = "\xff";
|
||||||
|
+ my $d = "\x{104}";
|
||||||
|
+ eval '$c =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/';
|
||||||
|
+ is($@, "", 'tr/\x{ff}-\x{104}/\x{100}-\x{105}/ compiled');
|
||||||
|
+ is($c, "\x{100}", 'ff -> 100');
|
||||||
|
+ eval '$d =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/';
|
||||||
|
+ is($d, "\x{105}", '104 -> 105');
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
1;
|
||||||
|
diff --git a/toke.c b/toke.c
|
||||||
|
index 2995737af2..28f305c62c 100644
|
||||||
|
--- a/toke.c
|
||||||
|
+++ b/toke.c
|
||||||
|
@@ -3044,7 +3044,7 @@ S_scan_const(pTHX_ char *start)
|
||||||
|
* 'offset_to_max' is the offset in 'sv' at which the character
|
||||||
|
* (the range's maximum end point) before 'd' begins.
|
||||||
|
*/
|
||||||
|
- char * max_ptr = SvPVX(sv) + offset_to_max;
|
||||||
|
+ char * max_ptr;
|
||||||
|
char * min_ptr;
|
||||||
|
IV range_min;
|
||||||
|
IV range_max; /* last character in range */
|
||||||
|
@@ -3056,6 +3056,8 @@ S_scan_const(pTHX_ char *start)
|
||||||
|
IV real_range_max = 0;
|
||||||
|
#endif
|
||||||
|
/* Get the code point values of the range ends. */
|
||||||
|
+ max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
|
||||||
|
+ offset_to_max = max_ptr - SvPVX_const(sv);
|
||||||
|
if (d_is_utf8) {
|
||||||
|
/* We know the utf8 is valid, because we just constructed
|
||||||
|
* it ourselves in previous loop iterations */
|
||||||
|
--
|
||||||
|
2.21.0
|
||||||
|
|
48
perl-5.31.5-toke.c-comment-changes.patch
Normal file
48
perl-5.31.5-toke.c-comment-changes.patch
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
From d7f7b0e39a10a6e3e0bd81d15473ee522a064016 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Karl Williamson <khw@cpan.org>
|
||||||
|
Date: Mon, 4 Nov 2019 21:55:53 -0700
|
||||||
|
Subject: [PATCH] toke.c: comment changes
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
These should have been included in
|
||||||
|
0c311b7c345769239f38d0139ea7738feec5ca4d
|
||||||
|
|
||||||
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||||
|
---
|
||||||
|
toke.c | 11 ++---------
|
||||||
|
1 file changed, 2 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/toke.c b/toke.c
|
||||||
|
index 3f376640ef..9c1e77f9db 100644
|
||||||
|
--- a/toke.c
|
||||||
|
+++ b/toke.c
|
||||||
|
@@ -3032,13 +3032,8 @@ S_scan_const(pTHX_ char *start)
|
||||||
|
s++; /* Skip past the hyphen */
|
||||||
|
|
||||||
|
/* d now points to where the end-range character will be
|
||||||
|
- * placed. Save it so won't have to go finding it later,
|
||||||
|
- * and drop down to get that character. (Actually we
|
||||||
|
- * instead save the offset, to handle the case where a
|
||||||
|
- * realloc in the meantime could change the actual
|
||||||
|
- * pointer). We'll finish processing the range the next
|
||||||
|
- * time through the loop */
|
||||||
|
- offset_to_max = d - SvPVX_const(sv);
|
||||||
|
+ * placed. Drop down to get that character. We'll finish
|
||||||
|
+ * processing the range the next time through the loop */
|
||||||
|
|
||||||
|
if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
|
||||||
|
has_above_latin1 = TRUE;
|
||||||
|
@@ -3055,8 +3050,6 @@ S_scan_const(pTHX_ char *start)
|
||||||
|
* are the range start and range end, in order.
|
||||||
|
* 'd' points to just beyond the range end in the 'sv' string,
|
||||||
|
* where we would next place something
|
||||||
|
- * 'offset_to_max' is the offset in 'sv' at which the character
|
||||||
|
- * (the range's maximum end point) before 'd' begins.
|
||||||
|
*/
|
||||||
|
char * max_ptr;
|
||||||
|
char * min_ptr;
|
||||||
|
--
|
||||||
|
2.21.0
|
||||||
|
|
10
perl.spec
10
perl.spec
@ -276,6 +276,11 @@ Patch66: perl-5.31.5-Be-clearer-about-taint-s-effect-on-INC.patch
|
|||||||
# in upstream after 5.31.5
|
# in upstream after 5.31.5
|
||||||
Patch67: perl-5.31.5-Tie-StdHandle-BINMODE-handle-layer-argument.patch
|
Patch67: perl-5.31.5-Tie-StdHandle-BINMODE-handle-layer-argument.patch
|
||||||
|
|
||||||
|
# Fix an unintended upgrade to UTF-8 in the middle of a transliteration,
|
||||||
|
# in upstream after 5.31.5
|
||||||
|
Patch68: perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch
|
||||||
|
Patch69: perl-5.31.5-toke.c-comment-changes.patch
|
||||||
|
|
||||||
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
|
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
|
||||||
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
|
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
|
||||||
|
|
||||||
@ -2864,6 +2869,8 @@ rm -rf .git # Perl tests examine a git repository
|
|||||||
%patch65 -p1
|
%patch65 -p1
|
||||||
%patch66 -p1
|
%patch66 -p1
|
||||||
%patch67 -p1
|
%patch67 -p1
|
||||||
|
%patch68 -p1
|
||||||
|
%patch69 -p1
|
||||||
%patch200 -p1
|
%patch200 -p1
|
||||||
%patch201 -p1
|
%patch201 -p1
|
||||||
|
|
||||||
@ -2926,6 +2933,8 @@ perl -x patchlevel.h \
|
|||||||
'Fedora Patch65: Fix taint mode documentation regarding @INC' \
|
'Fedora Patch65: Fix taint mode documentation regarding @INC' \
|
||||||
'Fedora Patch66: Fix taint mode documentation regarding @INC' \
|
'Fedora Patch66: Fix taint mode documentation regarding @INC' \
|
||||||
'Fedora Patch67: Fix handling a layer argument in Tie::StdHandle::BINMODE() (RT#132475)' \
|
'Fedora Patch67: Fix handling a layer argument in Tie::StdHandle::BINMODE() (RT#132475)' \
|
||||||
|
'Fedora Patch68: Fix an unintended upgrade to UTF-8 in the middle of a transliteration' \
|
||||||
|
'Fedora Patch69: Fix an unintended upgrade to UTF-8 in the middle of a transliteration' \
|
||||||
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
|
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
|
||||||
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
|
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
|
||||||
%{nil}
|
%{nil}
|
||||||
@ -5176,6 +5185,7 @@ popd
|
|||||||
- Fix handling undefined array members in Dumpvalue (RT#134441)
|
- Fix handling undefined array members in Dumpvalue (RT#134441)
|
||||||
- Fix taint mode documentation regarding @INC
|
- Fix taint mode documentation regarding @INC
|
||||||
- Fix handling a layer argument in Tie::StdHandle::BINMODE() (RT#132475)
|
- Fix handling a layer argument in Tie::StdHandle::BINMODE() (RT#132475)
|
||||||
|
- Fix an unintended upgrade to UTF-8 in the middle of a transliteration
|
||||||
|
|
||||||
* Mon Nov 11 2019 Jitka Plesnikova <jplesnik@redhat.com> - 4:5.30.1-447
|
* Mon Nov 11 2019 Jitka Plesnikova <jplesnik@redhat.com> - 4:5.30.1-447
|
||||||
- 5.30.1 bump (see <https://metacpan.org/pod/release/SHAY/perl-5.30.1/pod/perldelta.pod>
|
- 5.30.1 bump (see <https://metacpan.org/pod/release/SHAY/perl-5.30.1/pod/perldelta.pod>
|
||||||
|
Loading…
Reference in New Issue
Block a user