- fix CGI::escape for all strings (#472571)

- perl-CGI-t-util-58.patch: Do not distort lib/CGI/t/util-58.t http://rt.perl.org/rt3/Ticket/Display.html?id=64502
2009-04-07 09:57:20 +00:00 · 2009-04-07 09:57:20 +00:00 · 583621ac98
commit 583621ac98
parent c9cccc0be4
3 changed files with 137 additions and 2 deletions
--- a/perl-CGI-escape.patch
+++ b/perl-CGI-escape.patch
@ -0,0 +1,94 @@
 2009-04-06  Stepan Kasal  <skasal@redhat.com>
 	* t/util-58.t: Add tests reflecting common usage.
 	* CGI/Util.pm (encode): State what conversions are needed, in
 	accordance to the common usage mentioned above; and code it.
 diff -ur perl-5.10.0/lib/CGI/Util.pm perl-5.10.0/lib/CGI/Util.pm
 --- perl-5.10.0/lib/CGI/Util.pm	2008-09-08 15:58:52.000000000 +0200
 +++ perl-5.10.0/lib/CGI/Util.pm	2009-04-04 16:30:29.000000000 +0200
@@ -210,7 +210,6 @@
   my $todecode = shift;
   return undef unless defined($todecode);
   $todecode =~ tr/+/ /;       # pluses become spaces
 -    $EBCDIC = "\t" ne "\011";
     if ($EBCDIC) {
       $todecode =~ s/%([0-9a-fA-F]{2})/chr $A2E[hex($1)]/ge;
     } else {
@@ -232,16 +231,24 @@
 }
 # URL-encode data
 +#
 +# We cannot use the %u escapes, they were rejected by W3C, so the official
 +# way is %XX-escaped utf-8 encoding.
 +# Naturally, Unicode strings have to be converted to their utf-8 byte
 +# representation.  (No action is required on 5.6.)
 +# Byte strings were traditionally used directly as a sequence of octets.
 +# This worked if they actually represented binary data (i.e. in CGI::Compress).
 +# This also worked if these byte strings were actually utf-8 encoded; e.g.,
 +# when the source file used utf-8 without the apropriate "use utf8;".
 +# This fails if the byte string is actually a Latin 1 encoded string, but it
 +# was always so and cannot be fixed without breaking the binary data case.
 +# -- Stepan Kasal <skasal@redhat.com>
 +#
 sub escape {
   shift() if @_ > 1 and ( ref($_[0]) || (defined $_[1] && $_[0] eq $CGI::DefaultClass));
   my $toencode = shift;
   return undef unless defined($toencode);
 -  $toencode = eval { pack("C*", unpack("U0C*", $toencode))} || pack("C*", unpack("C*", $toencode));
 -
 -  # force bytes while preserving backward compatibility -- dankogai
 -  # but commented out because it was breaking CGI::Compress -- lstein
 -  # $toencode = eval { pack("U*", unpack("U0C*", $toencode))} || pack("C*", unpack("C*", $toencode));
 -
 +  utf8::encode($toencode) if ($] > 5.007 && utf8::is_utf8($toencode));
     if ($EBCDIC) {
       $toencode=~s/([^a-zA-Z0-9_.~-])/uc sprintf("%%%02x",$E2A[ord($1)])/eg;
     } else {
 diff -ur perl-5.10.0/lib/CGI/t/util-58.t perl-5.10.0/lib/CGI/t/util-58.t
 --- perl-5.10.0/lib/CGI/t/util-58.t	2003-04-14 20:32:22.000000000 +0200
 +++ perl-5.10.0/lib/CGI/t/util-58.t	2009-04-06 16:49:42.000000000 +0200
@@ -1,16 +1,29 @@
 +# test CGI::Util::escape
 +use Test::More tests => 4;
 +use_ok("CGI::Util");
 +
 +# Byte strings should be escaped byte by byte:
 +# 1) not a valid utf-8 sequence:
 +my $uri = "pe\x{f8}\x{ed}\x{e8}ko.ogg";
 +is(CGI::Util::escape($uri), "pe%F8%ED%E8ko.ogg", "Escape a Latin-2 string");
 +
 +# 2) is a valid utf-8 sequence, but not an UTF-8-flagged string
 +#    This happens often: people write utf-8 strings to source, but forget
 +#    to tell perl about it by "use utf8;"--this is obviously wrong, but we
 +#    have to handle it gracefully, for compatibility with GCI.pm under
 +#    perl-5.8.x
 #
 -# This tests CGI::Util::escape() when fed with UTF-8-flagged string
 -# -- dankogai
 -BEGIN {
 -    if ($] < 5.008) {
 -       print "1..0 # \$] == $] < 5.008\n";
 -       exit(0);
 -    }
 -}
 +$uri = "pe\x{c5}\x{99}\x{c3}\x{ad}\x{c4}\x{8d}ko.ogg";
 +is(CGI::Util::escape($uri), "pe%C5%99%C3%AD%C4%8Dko.ogg",
 +	"Escape an utf-8 byte string");
 -use Test::More tests => 2;
 -use_ok("CGI::Util");
 -my $uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji
 -is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt",
 -   "# Escape string with UTF-8 flag");
 +SKIP:
 +{
 +	# This tests CGI::Util::escape() when fed with UTF-8-flagged string
 +	# -- dankogai
 +	skip("Unicode strings not available in $]", 1) if ($] < 5.008);
 +	$uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji
 +	is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt",
 +   		"Escape string with UTF-8 flag");
 +}
 __END__
--- a/perl-CGI-t-util-58.patch
+++ b/perl-CGI-t-util-58.patch
@ -0,0 +1,22 @@
 2009-04-06  Stepan Kasal  <skasal@redhat.com>
 	* lib/CGI/t/util-58.t: return to the upstream version, do not
 	hide bugs.
 diff -ur perl-5.10.0.orig/lib/CGI/t/util-58.t perl-5.10.0/lib/CGI/t/util-58.t
 --- perl-5.10.0.orig/lib/CGI/t/util-58.t	2007-12-18 11:47:07.000000000 +0100
 +++ perl-5.10.0/lib/CGI/t/util-58.t	2009-04-06 18:28:07.000000000 +0200
@@ -11,11 +11,6 @@
 use Test::More tests => 2;
 use_ok("CGI::Util");
 my $uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji
 -if (ord('A') == 193) { # EBCDIC.
 -    is(CGI::Util::escape($uri), "%FC%C3%A0%EE%F9%E5%E7%F8%20%FC%C3%C7%CA.txt",
 -       "# Escape string with UTF-8 (UTF-EBCDIC) flag");
 -} else {
 -    is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt",
 -       "# Escape string with UTF-8 flag");
 -}
 +is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt",
 +   "# Escape string with UTF-8 flag");
 __END__
--- a/perl.spec
+++ b/perl.spec
@ -7,7 +7,7 @@
 Name:           perl
 Version:        %{perl_version}
-Release:        65%{?dist}
+Release:        66%{?dist}
 Epoch:          %{perl_epoch}
 Summary:        Practical Extraction and Report Language
 Group:          Development/Languages
@ -97,8 +97,13 @@ Patch35:	perl-5.10.0-reorderINC.patch
 # Fix from Archive::Extract maintainer to only look at stdout
 # We need this because we're using tar >= 1.21
 # included upstream in 0.31_03
 Patch36:	perl-5.10.0-Archive-Extract-onlystdout.patch
 # Do not distort lib/CGI/t/util-58.t
 # http://rt.perl.org/rt3/Ticket/Display.html?id=64502
 Patch37:	perl-CGI-t-util-58.patch
 ### Debian Patches ###
 # Fix issue with (nested) definition lists in lib/Pod/Html.pm
@ -160,7 +165,7 @@ Patch52:	31_fix_attributes_unknown_error
 Patch53:	32_fix_fork_rand
 # Fix memory leak with qr//.
-# Adapted from upstream changhe 34506.
+# Adapted from upstream change 34506.
 Patch54:	34_fix_qr-memory-leak-2
 # CVE-2005-0448 revisited: File::Path::rmtree no longer allows creating of setuid files.
@ -224,6 +229,11 @@ Patch118:	perl-update-autodie.patch
 # patches File-Fetch and CPAN
 Patch201:	perl-5.10.0-links.patch
 # Fix CGI::escape to work with all strings, started as #472571,
 # brought upstream as http://rt.cpan.org/Public/Bug/Display.html?id=34528,
 # accepted there for CGI.pm-3.43
 Patch202:	perl-CGI-escape.patch
 BuildRoot:      %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 BuildRequires:  tcsh, dos2unix, man, groff
 BuildRequires:  gdbm-devel, db4-devel, zlib-devel
@ -952,6 +962,7 @@ upstream tarball from perl.org.
 %patch34 -p1
 %patch35 -p1
 %patch36 -p1
 %patch37 -p1
 ### Debian patches ###
 %patch40 -p1
@ -993,6 +1004,7 @@ upstream tarball from perl.org.
 %patch117 -p1
 %patch118 -p1
 %patch201 -p1
 %patch202 -p1
 #
 # Candidates for doc recoding (need case by case review):
@ -1218,6 +1230,7 @@ perl -x patchlevel.h \
 	'34507 Fix memory leak in single-char character class optimization' \
 	'Fedora Patch35: Reorder @INC, based on b9ba2fadb18b54e35e5de54f945111a56cbcb249' \
 	'Fedora Patch36: Fix from Archive::Extract maintainer to only look at stdout from tar' \
 	'Fedora Patch37: Do not distort lib/CGI/t/util-58.t' \
 	'32727 Fix issue with (nested) definition lists in lib/Pod/Html.pm' \
 	'33287 Fix NULLOK items' \
 	'33554 Fix a typo in the predefined common protocols to make "udp" resolve without netbase' \
@ -1256,6 +1269,7 @@ perl -x patchlevel.h \
 	'Fedora Patch117: Update Digest::SHA to %{Digest_SHA_version}' \
 	'Fedora Patch117: Update module autodie to %{autodie_version}' \
 	'Fedora Patch201: Fedora uses links instead of lynx' \
 	'Fedora Patch202: Fix CGI::escape to work with all strings' \
 	%{nil}
 rm patchlevel.bak
@ -1880,6 +1894,11 @@ TMPDIR="$PWD/tmp" make test
 # Old changelog entries are preserved in CVS.
 %changelog
 * Tue Apr  7 2009 Stepan Kasal <skasal@redhat.com> - 4:5.10.0-66
 - fix CGI::escape for all strings (#472571)
 - perl-CGI-t-util-58.patch: Do not distort lib/CGI/t/util-58.t
  http://rt.perl.org/rt3/Ticket/Display.html?id=64502
 * Fri Mar 27 2009 Stepan Kasal <skasal@redhat.com> - 4:5.10.0-65
 - Move the gargantuan Changes* collection to -devel (#492605)