diff --git a/.gitignore b/.gitignore index 6ec815d..62102cb 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ /Encode-2.84.tar.gz /Encode-2.85.tar.gz /Encode-2.86.tar.gz +/Encode-2.87.tar.gz diff --git a/Encode-2.86-Encode-utf8-Add-tests-for-Malformed-and-Overlong-UTF.patch b/Encode-2.86-Encode-utf8-Add-tests-for-Malformed-and-Overlong-UTF.patch deleted file mode 100644 index 5d6b3d5..0000000 --- a/Encode-2.86-Encode-utf8-Add-tests-for-Malformed-and-Overlong-UTF.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 2aac84fb885fb8560294c1080fc6bbf9be35e731 Mon Sep 17 00:00:00 2001 -From: Pali -Date: Tue, 16 Aug 2016 18:34:37 +0200 -Subject: [PATCH] Encode::utf8: Add tests for Malformed and Overlong UTF-8 - sequences -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -EBCDIC tests are not included yet. - -Signed-off-by: Petr Písař ---- - t/utf8strict.t | 51 ++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 46 insertions(+), 5 deletions(-) - -diff --git a/t/utf8strict.t b/t/utf8strict.t -index 3f362f4..39293d3 100644 ---- a/t/utf8strict.t -+++ b/t/utf8strict.t -@@ -47,8 +47,8 @@ BEGIN { - qq/dd 67 41 41/ => 0, # 2.3.2 - qq/ee 42 73 73 71/ => 0, # 2.3.3 - qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG -- # "3 Malformed sequences" are checked by perl. -- # "4 Overlong sequences" are checked by perl. -+ # EBCDIC TODO: "3 Malformed sequences" -+ # EBCDIC TODO: "4 Overlong sequences" - ); - } else { - %SEQ = ( -@@ -56,8 +56,49 @@ BEGIN { - qq/ee 80 80/ => 0, # 2.3.2 - qq/f4 8f bf bd/ => 0, # 2.3.3 - qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG -- # "3 Malformed sequences" are checked by perl. -- # "4 Overlong sequences" are checked by perl. -+ qq/80/ => 1, # 3.1.1 -+ qq/bf/ => 1, # 3.1.2 -+ qq/80 bf/ => 1, # 3.1.3 -+ qq/80 bf 80/ => 1, # 3.1.4 -+ qq/80 bf 80 bf/ => 1, # 3.1.5 -+ qq/80 bf 80 bf 80/ => 1, # 3.1.6 -+ qq/80 bf 80 bf 80 bf/ => 1, # 3.1.7 -+ qq/80 bf 80 bf 80 bf 80/ => 1, # 3.1.8 -+ qq/80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf/ => 1, # 3.1.9 -+ qq/c0 20 c1 20 c2 20 c3 20 c4 20 c5 20 c6 20 c7 20 c8 20 c9 20 ca 20 cb 20 cc 20 cd 20 ce 20 cf 20 d0 20 d1 20 d2 20 d3 20 d4 20 d5 20 d6 20 d7 20 d8 20 d9 20 da 20 db 20 dc 20 dd 20 de 20 df 20/ => 1, # 3.2.1 -+ qq/e0 20 e1 20 e2 20 e3 20 e4 20 e5 20 e6 20 e7 20 e8 20 e9 20 ea 20 eb 20 ec 20 ed 20 ee 20 ef 20/ => 1, # 3.2.2 -+ qq/f0 20 f1 20 f2 20 f3 20 f4 20 f5 20 f6 20 f7 20/ => 1, # 3.2.3 -+ qq/f8 20 f9 20 fa 20 fb 20/ => 1, # 3.2.4 -+ qq/fc 20 fd 20/ => 1, # 3.2.5 -+ qq/c0/ => 1, # 3.3.1 -+ qq/e0 80/ => 1, # 3.3.2 -+ qq/f0 80 80/ => 1, # 3.3.3 -+ qq/f8 80 80 80/ => 1, # 3.3.4 -+ qq/fc 80 80 80 80/ => 1, # 3.3.5 -+ qq/df/ => 1, # 3.3.6 -+ qq/ef bf/ => 1, # 3.3.7 -+ qq/f7 bf bf/ => 1, # 3.3.8 -+ qq/fb bf bf bf/ => 1, # 3.3.9 -+ qq/fd bf bf bf bf/ => 1, # 3.3.10 -+ qq/c0 e0 80 f0 80 80 f8 80 80 80 fc 80 80 80 80 df ef bf f7 bf bf fb bf bf bf fd bf bf bf bf/ => 1, # 3.4.1 -+ qq/fe/ => 1, # 3.5.1 -+ qq/ff/ => 1, # 3.5.2 -+ qq/fe fe ff ff/ => 1, # 3.5.3 -+ qq/c0 af/ => 1, # 4.1.1 -+ qq/e0 80 af/ => 1, # 4.1.2 -+ qq/f0 80 80 af/ => 1, # 4.1.3 -+ qq/f8 80 80 80 af/ => 1, # 4.1.4 -+ qq/fc 80 80 80 80 af/ => 1, # 4.1.5 -+ qq/c1 bf/ => 1, # 4.2.1 -+ qq/e0 9f bf/ => 1, # 4.2.2 -+ qq/f0 8f bf bf/ => 1, # 4.2.3 -+ qq/f8 87 bf bf bf/ => 1, # 4.2.4 -+ qq/fc 83 bf bf bf bf/ => 1, # 4.2.5 -+ qq/c0 80/ => 1, # 4.3.1 -+ qq/e0 80 80/ => 1, # 4.3.2 -+ qq/f0 80 80 80/ => 1, # 4.3.3 -+ qq/f8 80 80 80 80/ => 1, # 4.3.4 -+ qq/fc 80 80 80 80 80/ => 1, # 4.3.5 - ); - } - $NTESTS += scalar keys %SEQ; -@@ -82,7 +123,7 @@ for my $s (sort keys %SEQ){ - eval { $d->decode($o,1) }; - $DEBUG and $@ and warn $@; - my $t = $@ ? 1 : 0; -- is($t, $SEQ{$s}, $s); -+ is($t, $SEQ{$s}, "sequence: $s"); - } - - __END__ --- -2.7.4 - diff --git a/Encode-2.86-Encode-utf8-Check-for-overflowed-and-overlong-UTF-8-.patch b/Encode-2.86-Encode-utf8-Check-for-overflowed-and-overlong-UTF-8-.patch deleted file mode 100644 index 5fbf2b4..0000000 --- a/Encode-2.86-Encode-utf8-Check-for-overflowed-and-overlong-UTF-8-.patch +++ /dev/null @@ -1,61 +0,0 @@ -From b7fc82093961f282b986a62c582b4ca2fcc303db Mon Sep 17 00:00:00 2001 -From: Pali -Date: Tue, 16 Aug 2016 19:07:31 +0200 -Subject: [PATCH] Encode::utf8: Check for overflowed and overlong UTF-8 - sequences -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Petr Písař ---- - Encode.xs | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/Encode.xs b/Encode.xs -index 60de62c..1906f0c 100644 ---- a/Encode.xs -+++ b/Encode.xs -@@ -331,6 +331,13 @@ strict_utf8(pTHX_ SV* sv) - - #define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c)) - -+#ifndef UTF_ACCUMULATION_OVERFLOW_MASK -+#ifndef CHARBITS -+#define CHARBITS CHAR_BIT -+#endif -+#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT)) -+#endif -+ - /* - * Convert non strict utf8 sequence of len >= 2 to unicode codepoint - */ -@@ -339,6 +346,7 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) - { - UV uv; - U8 *ptr = s; -+ bool overflowed = 0; - - uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len); - -@@ -350,11 +358,17 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) - *rlen = s-ptr; - return 0; - } -+ if (uv & UTF_ACCUMULATION_OVERFLOW_MASK) -+ overflowed = 1; - uv = UTF8_ACCUMULATE(uv, *s); - s++; - } - - *rlen = s-ptr; -+ -+ if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) -+ return 0; -+ - return uv; - } - --- -2.7.4 - diff --git a/Encode-2.86-Encode-utf8-Fix-count-of-replacement-characters-for-.patch b/Encode-2.86-Encode-utf8-Fix-count-of-replacement-characters-for-.patch deleted file mode 100644 index e3f14a8..0000000 --- a/Encode-2.86-Encode-utf8-Fix-count-of-replacement-characters-for-.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 3cf4b7e53c84d91fa819d89d2504be2db90dee11 Mon Sep 17 00:00:00 2001 -From: Pali -Date: Fri, 19 Aug 2016 10:58:56 +0200 -Subject: [PATCH] Encode::utf8: Fix count of replacement characters for - overflowed and overlong UTF-8 sequences -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Petr Písař ---- - Encode.xs | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/Encode.xs b/Encode.xs -index 1906f0c..49a3846 100644 ---- a/Encode.xs -+++ b/Encode.xs -@@ -366,8 +366,10 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) - - *rlen = s-ptr; - -- if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) -+ if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) { -+ *rlen = 1; - return 0; -+ } - - return uv; - } --- -2.7.4 - diff --git a/Encode-2.86-Encode-utf8-Fix-processing-invalid-UTF-8-subsequence.patch b/Encode-2.86-Encode-utf8-Fix-processing-invalid-UTF-8-subsequence.patch deleted file mode 100644 index 975753a..0000000 --- a/Encode-2.86-Encode-utf8-Fix-processing-invalid-UTF-8-subsequence.patch +++ /dev/null @@ -1,135 +0,0 @@ -From cbdb75742d763ab4133942014747132400c6ddfb Mon Sep 17 00:00:00 2001 -From: Pali -Date: Tue, 16 Aug 2016 19:05:17 +0200 -Subject: [PATCH] Encode::utf8: Fix processing invalid UTF-8 subsequences -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Skip number characters which was really processed by convert_utf8_multi_seq -and not just expected number of characters. Also simplify check for strict -UTF-8 mode. - -Signed-off-by: Petr Písař ---- - Encode.xs | 46 ++++++++++++++++++++++++++-------------------- - t/fallback.t | 2 +- - 2 files changed, 27 insertions(+), 21 deletions(-) - -diff --git a/Encode.xs b/Encode.xs -index 6b4fae9..60de62c 100644 ---- a/Encode.xs -+++ b/Encode.xs -@@ -325,13 +325,20 @@ strict_utf8(pTHX_ SV* sv) - #define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE) - #endif - -+#ifndef UNICODE_IS_SUPER -+#define UNICODE_IS_SUPER(c) (c > PERL_UNICODE_MAX) -+#endif -+ -+#define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c)) -+ -+/* -+ * Convert non strict utf8 sequence of len >= 2 to unicode codepoint -+ */ - static UV --convert_utf8_multi_seq(U8* s, STRLEN len, bool strict) -+convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) - { - UV uv; -- -- if (strict && len > 4) -- return 0; -+ U8 *ptr = s; - - uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len); - -@@ -339,15 +346,15 @@ convert_utf8_multi_seq(U8* s, STRLEN len, bool strict) - s++; - - while (len--) { -- if (!UTF8_IS_CONTINUATION(*s)) -+ if (!UTF8_IS_CONTINUATION(*s)) { -+ *rlen = s-ptr; - return 0; -+ } - uv = UTF8_ACCUMULATE(uv, *s); - s++; - } - -- if (strict && (UNICODE_IS_SURROGATE(uv) || UNICODE_IS_NONCHAR(uv) || uv > PERL_UNICODE_MAX)) -- return 0; -- -+ *rlen = s-ptr; - return uv; - } - -@@ -384,32 +391,30 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, - continue; - } - -+ ulen = 1; - if (UTF8_IS_START(*s)) { - U8 skip = UTF8SKIP(s); - if ((s + skip) > e) { - if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) { - const U8 *p = s + 1; - for (; p < e; p++) { -- if (!UTF8_IS_CONTINUATION(*p)) -+ if (!UTF8_IS_CONTINUATION(*p)) { -+ ulen = p-s; - goto malformed_byte; -+ } - } - break; - } - -+ ulen = e-s; - goto malformed_byte; - } - -- ulen = skip; -- uv = convert_utf8_multi_seq(s, skip, strict); -- if (uv == 0) { -- if (strict) { -- uv = convert_utf8_multi_seq(s, skip, 0); -- if (uv == 0) -- goto malformed_byte; -- goto malformed; -- } -+ uv = convert_utf8_multi_seq(s, skip, &ulen); -+ if (uv == 0) - goto malformed_byte; -- } -+ else if (strict && !UNICODE_IS_STRICT(uv)) -+ goto malformed; - - - /* Whole char is good */ -@@ -422,7 +427,8 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, - /* If we get here there is something wrong with alleged UTF-8 */ - malformed_byte: - uv = (UV)*s; -- ulen = 1; -+ if (ulen == 0) -+ ulen = 1; - - malformed: - if (check & ENCODE_DIE_ON_ERR){ -diff --git a/t/fallback.t b/t/fallback.t -index 8ef8ab3..86605ef 100644 ---- a/t/fallback.t -+++ b/t/fallback.t -@@ -35,7 +35,7 @@ for my $i (0x80..0xff){ - $uo .= chr($i); - $residue .= chr($i); - $af .= '?'; -- $uf .= "\x{FFFD}"; -+ $uf .= "\x{FFFD}" if $i < 0xfd; - $ap .= sprintf("\\x{%04x}", $i); - $up .= sprintf("\\x%02X", $i); - $ah .= sprintf("&#%d;", $i); --- -2.7.4 - diff --git a/Encode-2.86-Fix-return-value-of-Encode-encode_utf8-undef.patch b/Encode-2.86-Fix-return-value-of-Encode-encode_utf8-undef.patch deleted file mode 100644 index fb48e8e..0000000 --- a/Encode-2.86-Fix-return-value-of-Encode-encode_utf8-undef.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 646aaae364fc8cd19786a66b88ec6aaf3f093024 Mon Sep 17 00:00:00 2001 -From: Pali -Date: Thu, 11 Aug 2016 23:09:26 +0200 -Subject: [PATCH] Fix return value of Encode::encode_utf8(undef) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Documentation says that '$octets = encode_utf8($string)' is equivalent to -'$octets = encode("utf8", $string)'. And if the $string is undef, then -undef is returned. However without this patch encode_utf8(undef) returned -'' (empty string) and not undef. This patch fixes it and undef is returned. - -All other utf8 Encode calls already returns undef, just encode_utf8() acted -differently. - - Encode::encode('utf8', undef) -> undef - Encode::decode('utf8', undef) -> undef - Encode::decode_utf8(undef) -> undef - -Reported bug: https://rt.cpan.org/Public/Bug/Display.html?id=116904 - -Signed-off-by: Petr Písař ---- - Encode.pm | 1 + - t/utf8ref.t | 15 ++++++++++++++- - 2 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/Encode.pm b/Encode.pm -index bda8e1b..bc600be 100644 ---- a/Encode.pm -+++ b/Encode.pm -@@ -254,6 +254,7 @@ sub from_to($$$;$) { - - sub encode_utf8($) { - my ($str) = @_; -+ return undef unless defined $str; - utf8::encode($str); - return $str; - } -diff --git a/t/utf8ref.t b/t/utf8ref.t -index 3253e08..aff098f 100644 ---- a/t/utf8ref.t -+++ b/t/utf8ref.t -@@ -6,7 +6,7 @@ use strict; - use warnings; - use Encode; - use Test::More; --plan tests => 4; -+plan tests => 12; - #plan 'no_plan'; - - # my $a = find_encoding('ASCII'); -@@ -18,3 +18,16 @@ is $u->encode($r), ''; - $r = {}; - is decode_utf8($r), ''.$r; - is $u->decode($r), ''; -+use warnings 'uninitialized'; -+ -+is encode_utf8(undef), undef; -+is decode_utf8(undef), undef; -+ -+is encode_utf8(''), ''; -+is decode_utf8(''), ''; -+ -+is Encode::encode('utf8', undef), undef; -+is Encode::decode('utf8', undef), undef; -+ -+is Encode::encode('utf8', ''), ''; -+is Encode::decode('utf8', ''), ''; --- -2.7.4 - diff --git a/perl-Encode.spec b/perl-Encode.spec index 8565ded..dfa0ffe 100644 --- a/perl-Encode.spec +++ b/perl-Encode.spec @@ -1,6 +1,6 @@ # Because encoding sub-package has independent version, version macro gets # redefined. -%global cpan_version 2.86 +%global cpan_version 2.87 Name: perl-Encode Epoch: 4 Version: %{cpan_version} @@ -8,7 +8,7 @@ Version: %{cpan_version} # perl-encoding sub-package has independent version which does not change # often and consecutive builds would clash on perl-encoding NEVRA. This is the # same case as in perl.spec. -Release: 3%{?dist} +Release: 4%{?dist} Summary: Character encodings in Perl # ucm: UCD # bin/encguess: Artistic 2.0 @@ -17,24 +17,6 @@ License: (GPL+ or Artistic) and Artistic 2.0 and UCD Group: Development/Libraries URL: http://search.cpan.org/dist/Encode/ Source0: http://www.cpan.org/authors/id/D/DA/DANKOGAI/Encode-%{cpan_version}.tar.gz -# Fix Encode::encode_utf8(undef) to return undef, CPAN RT#116904, -# in upstream after 2.86 -Patch0: Encode-2.86-Fix-return-value-of-Encode-encode_utf8-undef.patch -# Tests for refusing non-shortests UTF-8 representations, -# , in upstream after 2.86 -Patch1: Encode-2.86-Encode-utf8-Add-tests-for-Malformed-and-Overlong-UTF.patch -# 1/3 Refusing non-shortests UTF-8 representations, -# , in upstream after 2.86 -Patch2: Encode-2.86-Encode-utf8-Fix-processing-invalid-UTF-8-subsequence.patch -# 2/3 Refusing non-shortests UTF-8 representations, -# , in upstream after 2.86 -Patch3: Encode-2.86-Encode-utf8-Check-for-overflowed-and-overlong-UTF-8-.patch -# 3/3 Refusing non-shortests UTF-8 representations, -# , in upstream after 2.86 -Patch4: Encode-2.86-Encode-utf8-Fix-count-of-replacement-characters-for-.patch -# Fix panic when encoding undefined scalars, -# , in upstream after 2.86 -Patch5: Encode-2.86-Fix-panic-when-encoding-undef-scalars.patch BuildRequires: coreutils BuildRequires: findutils BuildRequires: make @@ -145,12 +127,6 @@ your own encoding to perl. No knowledge of XS is necessary. %prep %setup -q -n Encode-%{cpan_version} -%patch0 -p1 -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 %build # Additional scripts can be installed by appending MORE_SCRIPTS, UCM files by @@ -192,6 +168,9 @@ make test %{perl_vendorarch}/Encode/encode.h %changelog +* Mon Oct 31 2016 Petr Pisar - 4:2.87-4 +- 2.87 bump + * Fri Sep 30 2016 Petr Pisar - 4:2.86-3 - Fix Encode::encode_utf8(undef) to return undef (CPAN RT#116904) - Refuse non-shortests UTF-8 representations in strict mode diff --git a/sources b/sources index 2d661ff..48ffbbc 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -ebb87962b8cbaf0097721d0b7f1becfc Encode-2.86.tar.gz +cf79f404b01394620f2881524ea9e868 Encode-2.87.tar.gz