From eadda0906396be22eefd4a5cd3ca0bd35af39fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Tue, 12 Nov 2019 16:57:00 +0100 Subject: [PATCH] Fix an unintended upgrade to UTF-8 in the middle of a transliteration --- ...-bug-tr-upgrading-to-UTF-8-in-middle.patch | 78 +++++++++++++++++++ perl-5.31.5-toke.c-comment-changes.patch | 48 ++++++++++++ perl.spec | 10 +++ 3 files changed, 136 insertions(+) create mode 100644 perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch create mode 100644 perl-5.31.5-toke.c-comment-changes.patch diff --git a/perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch b/perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch new file mode 100644 index 0000000..7a222bf --- /dev/null +++ b/perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch @@ -0,0 +1,78 @@ +From 0c311b7c345769239f38d0139ea7738feec5ca4d Mon Sep 17 00:00:00 2001 +From: Karl Williamson +Date: Sat, 2 Nov 2019 13:59:38 -0600 +Subject: [PATCH] toke.c: Fix bug tr/// upgrading to UTF-8 in middle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Consider tr/\x{ff}-\x{100}/AB/. + +While parsing, the code keeps an offset from the beginning of the output +to the beginning of the second number in the range. This is purely for +speed so that it wouldn't have to re-find the beginning of that value, +when it already knew it. + +But the example above shows the folly of this shortcut. The second +number in the range causes the output to be upgraded to UTF-8, which +makes that offset invalid in general. Change to re-find the beginning. + +Signed-off-by: Petr Písař +--- + t/op/tr.t | 12 +++++++++++- + toke.c | 4 +++- + 2 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/t/op/tr.t b/t/op/tr.t +index 47d603d4fd..25125c5bc7 100644 +--- a/t/op/tr.t ++++ b/t/op/tr.t +@@ -13,7 +13,7 @@ BEGIN { + + use utf8; + +-plan tests => 301; ++plan tests => 304; + + # Test this first before we extend the stack with other operations. + # This caused an asan failure due to a bad write past the end of the stack. +@@ -1145,4 +1145,14 @@ for ("", nullrocow) { + 'RT #133880 illegal \N{}'); + } + ++{ ++ my $c = "\xff"; ++ my $d = "\x{104}"; ++ eval '$c =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/'; ++ is($@, "", 'tr/\x{ff}-\x{104}/\x{100}-\x{105}/ compiled'); ++ is($c, "\x{100}", 'ff -> 100'); ++ eval '$d =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/'; ++ is($d, "\x{105}", '104 -> 105'); ++} ++ + 1; +diff --git a/toke.c b/toke.c +index 2995737af2..28f305c62c 100644 +--- a/toke.c ++++ b/toke.c +@@ -3044,7 +3044,7 @@ S_scan_const(pTHX_ char *start) + * 'offset_to_max' is the offset in 'sv' at which the character + * (the range's maximum end point) before 'd' begins. + */ +- char * max_ptr = SvPVX(sv) + offset_to_max; ++ char * max_ptr; + char * min_ptr; + IV range_min; + IV range_max; /* last character in range */ +@@ -3056,6 +3056,8 @@ S_scan_const(pTHX_ char *start) + IV real_range_max = 0; + #endif + /* Get the code point values of the range ends. */ ++ max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1; ++ offset_to_max = max_ptr - SvPVX_const(sv); + if (d_is_utf8) { + /* We know the utf8 is valid, because we just constructed + * it ourselves in previous loop iterations */ +-- +2.21.0 + diff --git a/perl-5.31.5-toke.c-comment-changes.patch b/perl-5.31.5-toke.c-comment-changes.patch new file mode 100644 index 0000000..439a97d --- /dev/null +++ b/perl-5.31.5-toke.c-comment-changes.patch @@ -0,0 +1,48 @@ +From d7f7b0e39a10a6e3e0bd81d15473ee522a064016 Mon Sep 17 00:00:00 2001 +From: Karl Williamson +Date: Mon, 4 Nov 2019 21:55:53 -0700 +Subject: [PATCH] toke.c: comment changes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +These should have been included in +0c311b7c345769239f38d0139ea7738feec5ca4d + +Signed-off-by: Petr Písař +--- + toke.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +diff --git a/toke.c b/toke.c +index 3f376640ef..9c1e77f9db 100644 +--- a/toke.c ++++ b/toke.c +@@ -3032,13 +3032,8 @@ S_scan_const(pTHX_ char *start) + s++; /* Skip past the hyphen */ + + /* d now points to where the end-range character will be +- * placed. Save it so won't have to go finding it later, +- * and drop down to get that character. (Actually we +- * instead save the offset, to handle the case where a +- * realloc in the meantime could change the actual +- * pointer). We'll finish processing the range the next +- * time through the loop */ +- offset_to_max = d - SvPVX_const(sv); ++ * placed. Drop down to get that character. We'll finish ++ * processing the range the next time through the loop */ + + if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { + has_above_latin1 = TRUE; +@@ -3055,8 +3050,6 @@ S_scan_const(pTHX_ char *start) + * are the range start and range end, in order. + * 'd' points to just beyond the range end in the 'sv' string, + * where we would next place something +- * 'offset_to_max' is the offset in 'sv' at which the character +- * (the range's maximum end point) before 'd' begins. + */ + char * max_ptr; + char * min_ptr; +-- +2.21.0 + diff --git a/perl.spec b/perl.spec index 7da6a0a..0c45904 100644 --- a/perl.spec +++ b/perl.spec @@ -276,6 +276,11 @@ Patch66: perl-5.31.5-Be-clearer-about-taint-s-effect-on-INC.patch # in upstream after 5.31.5 Patch67: perl-5.31.5-Tie-StdHandle-BINMODE-handle-layer-argument.patch +# Fix an unintended upgrade to UTF-8 in the middle of a transliteration, +# in upstream after 5.31.5 +Patch68: perl-5.31.5-toke.c-Fix-bug-tr-upgrading-to-UTF-8-in-middle.patch +Patch69: perl-5.31.5-toke.c-comment-changes.patch + # Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048 Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch @@ -2864,6 +2869,8 @@ rm -rf .git # Perl tests examine a git repository %patch65 -p1 %patch66 -p1 %patch67 -p1 +%patch68 -p1 +%patch69 -p1 %patch200 -p1 %patch201 -p1 @@ -2926,6 +2933,8 @@ perl -x patchlevel.h \ 'Fedora Patch65: Fix taint mode documentation regarding @INC' \ 'Fedora Patch66: Fix taint mode documentation regarding @INC' \ 'Fedora Patch67: Fix handling a layer argument in Tie::StdHandle::BINMODE() (RT#132475)' \ + 'Fedora Patch68: Fix an unintended upgrade to UTF-8 in the middle of a transliteration' \ + 'Fedora Patch69: Fix an unintended upgrade to UTF-8 in the middle of a transliteration' \ 'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \ 'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \ %{nil} @@ -5176,6 +5185,7 @@ popd - Fix handling undefined array members in Dumpvalue (RT#134441) - Fix taint mode documentation regarding @INC - Fix handling a layer argument in Tie::StdHandle::BINMODE() (RT#132475) +- Fix an unintended upgrade to UTF-8 in the middle of a transliteration * Mon Nov 11 2019 Jitka Plesnikova - 4:5.30.1-447 - 5.30.1 bump (see