From e0456df3dcd976a15d418d150cc551ba5ce8f006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Tue, 8 Aug 2017 14:47:49 +0200 Subject: [PATCH] Fix reporting malformed UTF-8 character --- ...31646-Assertion-fail-UTF-8-error-msg.patch | 63 +++++++++++++++++++ ...-5.27.1-t-lib-warnings-utf8-Fix-test.patch | 30 +++++++++ ...rl-131646-make-the-test-less-fragile.patch | 43 +++++++++++++ perl.spec | 14 ++++- 4 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 perl-5.27.1-PATCH-perl-131646-Assertion-fail-UTF-8-error-msg.patch create mode 100644 perl-5.27.1-t-lib-warnings-utf8-Fix-test.patch create mode 100644 perl-5.27.2-perl-131646-make-the-test-less-fragile.patch diff --git a/perl-5.27.1-PATCH-perl-131646-Assertion-fail-UTF-8-error-msg.patch b/perl-5.27.1-PATCH-perl-131646-Assertion-fail-UTF-8-error-msg.patch new file mode 100644 index 0000000..f479bde --- /dev/null +++ b/perl-5.27.1-PATCH-perl-131646-Assertion-fail-UTF-8-error-msg.patch @@ -0,0 +1,63 @@ +From 1d5030e143202c1e963e1fc91eb6f3afaa2df83e Mon Sep 17 00:00:00 2001 +From: Karl Williamson +Date: Sat, 24 Jun 2017 11:47:19 -0600 +Subject: [PATCH] PATCH: [perl #131646] Assertion fail UTF-8 error msg +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Instead of croaking with a proper message, creating the message creates +an assertion failure. + +The cause was that there were two ++ operators on a string, so one +should subtract 2 to get to the string start, but only 1 was being +subtracted. + +This is a 5.26 regression, but not terribly consequential, as the +program is about to die, but it is a trivial fix that allows the reason +the crash is happening to be properly displayed to aid debugging, so I'm +adding my vote for it for 5.26.1. + +Signed-off-by: Petr Písař +--- + t/lib/warnings/utf8 | 13 +++++++++++++ + utf8.c | 2 +- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 +index a4dfb12..a26bbed 100644 +--- a/t/lib/warnings/utf8 ++++ b/t/lib/warnings/utf8 +@@ -749,3 +749,16 @@ BEGIN{ + {};$^H=eval'2**400'} + EXPECT + Malformed UTF-8 character: \xc2\x0a (unexpected non-continuation byte 0x0a, immediately after start byte 0xc2; need 2 bytes, got 1) at - line 11. ++######## ++# NAME [perl #131646] ++BEGIN{ ++ if (ord('A') == 193) { ++ print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; ++ exit 0; ++ } ++} ++no warnings; ++use warnings 'utf8'; ++for(uc 0..t){0~~pack"UXp>",exp} ++EXPECT ++Malformed UTF-8 character: \xc2\x00 (unexpected non-continuation byte 0x00, immediately after start byte 0xc2; need 2 bytes, got 1) in smart match at - line 9. +diff --git a/utf8.c b/utf8.c +index 68ac640..2ee701a 100644 +--- a/utf8.c ++++ b/utf8.c +@@ -1875,7 +1875,7 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen) + /* diag_listed_as: Malformed UTF-8 character%s */ + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), + "%s %s%s", +- unexpected_non_continuation_text(u - 1, 2, 1, 2), ++ unexpected_non_continuation_text(u - 2, 2, 1, 2), + PL_op ? " in " : "", + PL_op ? OP_DESC(PL_op) : ""); + return -2; +-- +2.9.4 + diff --git a/perl-5.27.1-t-lib-warnings-utf8-Fix-test.patch b/perl-5.27.1-t-lib-warnings-utf8-Fix-test.patch new file mode 100644 index 0000000..3ed22e2 --- /dev/null +++ b/perl-5.27.1-t-lib-warnings-utf8-Fix-test.patch @@ -0,0 +1,30 @@ +From 97e57bec1f0ba4f0c3b1dc18ee146632010e3373 Mon Sep 17 00:00:00 2001 +From: Karl Williamson +Date: Sat, 15 Jul 2017 19:36:25 -0600 +Subject: [PATCH] t/lib/warnings/utf8: Fix test +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There is some randomness to this test added to fix [perl #131646]. +Change what passes to be a pattern that matches the correct template + +Signed-off-by: Petr Písař +--- + t/lib/warnings/utf8 | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 +index 9066308..dfc58c1 100644 +--- a/t/lib/warnings/utf8 ++++ b/t/lib/warnings/utf8 +@@ -781,4 +781,5 @@ no warnings; + use warnings 'utf8'; + for(uc 0..t){0~~pack"UXp>",exp} + EXPECT +-Malformed UTF-8 character: \xc2\x00 (unexpected non-continuation byte 0x00, immediately after start byte 0xc2; need 2 bytes, got 1) in smart match at - line 9. ++OPTIONS regex ++Malformed UTF-8 character: \\x([[:xdigit:]]{2})\\x([[:xdigit:]]{2}) \(unexpected non-continuation byte 0x\2, immediately after start byte 0x\1; need 2 bytes, got 1\) in smart match at - line 9. +-- +2.9.4 + diff --git a/perl-5.27.2-perl-131646-make-the-test-less-fragile.patch b/perl-5.27.2-perl-131646-make-the-test-less-fragile.patch new file mode 100644 index 0000000..49eb6a8 --- /dev/null +++ b/perl-5.27.2-perl-131646-make-the-test-less-fragile.patch @@ -0,0 +1,43 @@ +From 9c6b56dc65cdd9256fbe04a7baf4f085db1c04dd Mon Sep 17 00:00:00 2001 +From: Tony Cook +Date: Tue, 8 Aug 2017 14:45:29 +1000 +Subject: [PATCH] (perl #131646) make the test less fragile +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The original pattern "UXp>" with the $_ that causes the failure, 5, +so we end up packing exp(5) or 148.... with U packs: + + - U (148), producing C2 94, with the UTF8 flag set + - X - back up a byte, + - p> - write the address of PL_sv_no's PV in big-ending + +The final p> will typically overwrite the 94 with a zero on 64-bit +systems, but with the smaller address space of 32-bit systems that +high-byte is more likely to be a valid continuation byte, causing +the comparison to fail. + +Instead just pack a zero byte. + +Signed-off-by: Petr Písař +--- + t/lib/warnings/utf8 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 +index dfc58c1..a9a6388 100644 +--- a/t/lib/warnings/utf8 ++++ b/t/lib/warnings/utf8 +@@ -779,7 +779,7 @@ BEGIN{ + } + no warnings; + use warnings 'utf8'; +-for(uc 0..t){0~~pack"UXp>",exp} ++for(uc 0..t){0~~pack"UXc",exp} + EXPECT + OPTIONS regex + Malformed UTF-8 character: \\x([[:xdigit:]]{2})\\x([[:xdigit:]]{2}) \(unexpected non-continuation byte 0x\2, immediately after start byte 0x\1; need 2 bytes, got 1\) in smart match at - line 9. +-- +2.9.4 + diff --git a/perl.spec b/perl.spec index 56bbf1c..ab2ec2a 100644 --- a/perl.spec +++ b/perl.spec @@ -79,7 +79,7 @@ License: GPL+ or Artistic Epoch: %{perl_epoch} Version: %{perl_version} # release number must be even higher, because dual-lived modules will be broken otherwise -Release: 397%{?dist} +Release: 398%{?dist} Summary: Practical Extraction and Report Language Url: http://www.perl.org/ Source0: http://www.cpan.org/src/5.0/perl-%{perl_version}.tar.bz2 @@ -177,6 +177,11 @@ Patch39: perl-5.26.0-don-t-call-Perl_fbm_instr-with-negative-length.patch Patch40: perl-5.27.0-Resolve-Perl-131522-Spurious-Assuming-NOT-a-POSIX-cl.patch Patch41: perl-5.27.0-add-test-for-perl-131522-and-fix-test-for-related-pe.patch +# Fix reporting malformed UTF-8 character, RT#131646, in upstream after 5.27.1 +Patch42: perl-5.27.1-PATCH-perl-131646-Assertion-fail-UTF-8-error-msg.patch +Patch43: perl-5.27.1-t-lib-warnings-utf8-Fix-test.patch +Patch44: perl-5.27.2-perl-131646-make-the-test-less-fragile.patch + # Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048 Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch @@ -2746,6 +2751,9 @@ Perl extension for Version Objects %patch39 -p1 %patch40 -p1 %patch41 -p1 +%patch42 -p1 +%patch43 -p1 +%patch44 -p1 %patch200 -p1 %patch201 -p1 @@ -2775,6 +2783,7 @@ perl -x patchlevel.h \ 'Fedora Patch38: Fix handling backslashes in PATH environment variable when executing "perl -S" (RT#129183)' \ 'Fedora Patch39: Fix a conditional jump on uninitilized memory in re_intuit_start() (RT#131575)' \ 'Fedora Patch40: Fix spurious "Assuming NOT a POSIX class" warning (RT#131522)' \ + 'Fedora Patch42: Fix reporting malformed UTF-8 character (RT#131646)' \ 'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \ 'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \ %{nil} @@ -5058,6 +5067,9 @@ popd # Old changelog entries are preserved in CVS. %changelog +* Tue Aug 08 2017 Petr Pisar - 4:5.26.0-398 +- Fix reporting malformed UTF-8 character (RT#131646) + * Sat Jul 29 2017 Igor Gnatenko - 4:5.26.0-397 - Enable separate debuginfo back