Fix fc() in Turkish locale

This commit is contained in:
Petr Písař 2021-02-09 18:50:32 +01:00
parent 27a18537fa
commit 9efe548119
2 changed files with 146 additions and 0 deletions

View File

@ -0,0 +1,140 @@
From 4cfbe5474a5c5f852a6dbf0138dc796c2800be93 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 30 Dec 2020 05:55:08 -0700
Subject: [PATCH] Fix buggy fc() in Turkish locale
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When Turkish handling was added, fc() wasn't properly updated
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
pp.c | 12 +++++++++---
t/op/lc.t | 23 ++++++++++++++++-------
2 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/pp.c b/pp.c
index 5e1706346d..23cc6c8adb 100644
--- a/pp.c
+++ b/pp.c
@@ -4813,7 +4813,7 @@ PP(pp_fc)
do {
extra++;
- s_peek = (U8 *) memchr(s_peek + 1, 'i',
+ s_peek = (U8 *) memchr(s_peek + 1, 'I',
send - (s_peek + 1));
} while (s_peek != NULL);
}
@@ -4828,8 +4828,14 @@ PP(pp_fc)
+ 1 /* Trailing NUL */ );
d = (U8*)SvPVX(dest) + len;
- *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
- *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU);
+ if (*s == 'I') {
+ *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
+ *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
+ }
+ else {
+ *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
+ *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU);
+ }
s++;
for (; s < send; s++) {
diff --git a/t/op/lc.t b/t/op/lc.t
index fce77f3d34..812c41d6b6 100644
--- a/t/op/lc.t
+++ b/t/op/lc.t
@@ -17,7 +17,7 @@ BEGIN {
use feature qw( fc );
-plan tests => 139 + 2 * (4 * 256) + 15;
+plan tests => 139 + 2 * (5 * 256) + 17;
is(lc(undef), "", "lc(undef) is ''");
is(lcfirst(undef), "", "lcfirst(undef) is ''");
@@ -352,13 +352,14 @@ foreach my $turkic (0 .. 1) {
my $locale = ($turkic) ? $turkic_locale : $non_turkic_locale;
SKIP: {
- skip "Can't find a $type UTF-8 locale", 4*256 unless defined $locale;
+ skip "Can't find a $type UTF-8 locale", 5*256 unless defined $locale;
use feature qw( unicode_strings );
no locale;
my @unicode_lc;
+ my @unicode_fc;
my @unicode_uc;
my @unicode_lcfirst;
my @unicode_ucfirst;
@@ -366,6 +367,7 @@ foreach my $turkic (0 .. 1) {
# Get all the values outside of 'locale'
for my $i (0 .. 255) {
push @unicode_lc, lc(chr $i);
+ push @unicode_fc, fc(chr $i);
push @unicode_uc, uc(chr $i);
push @unicode_lcfirst, lcfirst(chr $i);
push @unicode_ucfirst, ucfirst(chr $i);
@@ -373,6 +375,7 @@ foreach my $turkic (0 .. 1) {
if ($turkic) {
$unicode_lc[ord 'I'] = chr 0x131;
+ $unicode_fc[ord 'I'] = chr 0x131;
$unicode_lcfirst[ord 'I'] = chr 0x131;
$unicode_uc[ord 'i'] = chr 0x130;
$unicode_ucfirst[ord 'i'] = chr 0x130;
@@ -384,6 +387,7 @@ foreach my $turkic (0 .. 1) {
for my $i (0 .. 255) {
is(lc(chr $i), $unicode_lc[$i], "In a $type UTF-8 locale, lc(chr $i) is the same as official Unicode");
is(uc(chr $i), $unicode_uc[$i], "In a $type UTF-8 locale, uc(chr $i) is the same as official Unicode");
+ is(fc(chr $i), $unicode_fc[$i], "In a $type UTF-8 locale, fc(chr $i) is the same as official Unicode");
is(lcfirst(chr $i), $unicode_lcfirst[$i], "In a $type UTF-8 locale, lcfirst(chr $i) is the same as official Unicode");
is(ucfirst(chr $i), $unicode_ucfirst[$i], "In a $type UTF-8 locale, ucfirst(chr $i) is the same as official Unicode");
}
@@ -391,27 +395,32 @@ foreach my $turkic (0 .. 1) {
}
SKIP: {
- skip "Can't find a turkic UTF-8 locale", 15 unless defined $turkic_locale;
+ skip "Can't find a turkic UTF-8 locale", 17 unless defined $turkic_locale;
# These are designed to stress the calculation of space needed for the
# strings. $filler contains a variety of characters that have special
# handling in the casing functions, and some regular chars as well.
+ # (0x49 = 'I')
my $filler_length = 10000;
- my $filler = uni_to_native("\x{df}\x{b5}\x{e0}\x{c1}\x{b6}\x{ff}") x $filler_length;
+ my $filler = uni_to_native("\x{df}\x{49}\x{69}\x{b5}\x{e0}\x{c1}\x{b6}\x{ff}") x $filler_length;
# These are the correct answers to what should happen when the given
# casing function is called on $filler;
- my $filler_lc = uni_to_native("\x{df}\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}") x $filler_length;
- my $filler_fc = ("ss" . uni_to_native("\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}")) x $filler_length;
- my $filler_uc = ("SS" . uni_to_native("\x{39c}\x{c0}\x{c1}\x{b6}\x{178}")) x $filler_length;
+ my $filler_lc = uni_to_native("\x{df}\x{131}\x{69}\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}") x $filler_length;
+ my $filler_fc = ("ss" . uni_to_native("\x{131}\x{69}\x{3bc}\x{e0}\x{e1}\x{b6}\x{ff}")) x $filler_length;
+ my $filler_uc = ("SS" . uni_to_native("\x{49}\x{130}\x{39c}\x{c0}\x{c1}\x{b6}\x{178}")) x $filler_length;
use locale;
setlocale(&POSIX::LC_CTYPE, $turkic_locale);
is (lc "IIIIIII$filler", "\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc",
"lc non-UTF-8, in Turkic locale, beginning with a bunch of I's");
+ is (fc "IIIIIII$filler", "\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_fc",
+ "fc non-UTF-8, in Turkic locale, beginning with a bunch of I's");
is (lc "${filler}IIIIIII$filler", "$filler_lc\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc",
"lc non-UTF-8, in Turkic locale, a bunch of I's, but not at the beginning");
+ is (fc "${filler}IIIIIII$filler", "$filler_fc\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_fc",
+ "fc non-UTF-8, in Turkic locale, a bunch of I's, but not at the beginning");
is (lc "${filler}I\x{307}$filler", "${filler_lc}i$filler_lc",
"lc in Turkic locale with DOT ABOVE immediately following I");
is (lc "${filler}I\x{307}IIIIII$filler", "${filler_lc}i\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc",
--
2.26.2

View File

@ -248,6 +248,9 @@ Patch48: perl-5.32.1-DynaLoader-use-PerlEnv_getenv.patch
# Use duplocale() if available, in upstream after 5.33.4 # Use duplocale() if available, in upstream after 5.33.4
Patch49: perl-5.33.4-locale.c-Fix-typo-in-ifdef.patch Patch49: perl-5.33.4-locale.c-Fix-typo-in-ifdef.patch
# Fix fc() in Turkish locale, in upstream after 5.33.5
Patch50: perl-5.33.5-Fix-buggy-fc-in-Turkish-locale.patch
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048 # Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
@ -4298,6 +4301,7 @@ you're not running VMS, this module does nothing.
%patch47 -p1 %patch47 -p1
%patch48 -p1 %patch48 -p1
%patch49 -p1 %patch49 -p1
%patch50 -p1
%patch200 -p1 %patch200 -p1
%patch201 -p1 %patch201 -p1
@ -4346,6 +4350,7 @@ perl -x patchlevel.h \
'Fedora Patch47: Fix a crash in optimizing split() (GH#18232)' \ 'Fedora Patch47: Fix a crash in optimizing split() (GH#18232)' \
'Fedora Patch48: Make accessing environment by DynaLoader thread-safe' \ 'Fedora Patch48: Make accessing environment by DynaLoader thread-safe' \
'Fedora Patch49: Use duplocale() if available' \ 'Fedora Patch49: Use duplocale() if available' \
'Fedora Patch50: Fix fc() in Turkish locale' \
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \ 'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \ 'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
%{nil} %{nil}
@ -7076,6 +7081,7 @@ popd
* Tue Feb 09 2021 Petr Pisar <ppisar@redhat.com> - 4:5.32.1-471 * Tue Feb 09 2021 Petr Pisar <ppisar@redhat.com> - 4:5.32.1-471
- Make accessing environment by DynaLoader thread-safe - Make accessing environment by DynaLoader thread-safe
- Use duplocale() if available - Use duplocale() if available
- Fix fc() in Turkish locale
* Tue Jan 26 2021 Fedora Release Engineering <releng@fedoraproject.org> - 4:5.32.1-470 * Tue Jan 26 2021 Fedora Release Engineering <releng@fedoraproject.org> - 4:5.32.1-470
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild - Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild