107 lines
3.6 KiB
Diff
107 lines
3.6 KiB
Diff
From 5361c2b7f48599718cdecbe50c5fdd88b28ffd79 Mon Sep 17 00:00:00 2001
|
|
From: Toddr Bot <toddbot@rinaldo.us>
|
|
Date: Mon, 16 Mar 2026 20:55:31 +0000
|
|
Subject: [PATCH] Fix buffer overflow in parse_stream when filehandle has :utf8
|
|
layer
|
|
|
|
When a filehandle has a :utf8 PerlIO layer, Perl's read() returns
|
|
decoded characters, but SvPV() gives back the UTF-8 byte
|
|
representation which can be larger than the pre-allocated XML buffer.
|
|
Previously this caused heap corruption (double free / buffer overflow),
|
|
and a later workaround (BUFSIZE * 6 + croak) prevented the corruption
|
|
but still crashed.
|
|
|
|
Fix by re-obtaining the expat buffer at the actual byte size when the
|
|
read produces more bytes than initially allocated. This handles UTF-8
|
|
streams gracefully without wasting memory on an oversized buffer.
|
|
|
|
Fixes https://github.com/cpan-authors/XML-Parser/issues/64
|
|
(migrated from rt.cpan.org #19859)
|
|
|
|
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
|
|
---
|
|
Expat/Expat.xs | 15 +++++++++++----
|
|
t/utf8_stream.t | 40 ++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 51 insertions(+), 4 deletions(-)
|
|
create mode 100644 t/utf8_stream.t
|
|
|
|
diff --git a/Expat/Expat.xs b/Expat/Expat.xs
|
|
index 32fdce5..3cd1154 100644
|
|
--- a/Expat/Expat.xs
|
|
+++ b/Expat/Expat.xs
|
|
@@ -343,8 +343,8 @@ parse_stream(XML_Parser parser, SV * ioref)
|
|
}
|
|
else {
|
|
tbuff = newSV(0);
|
|
- tsiz = newSViv(BUFSIZE); /* in UTF-8 characters */
|
|
- buffsize = BUFSIZE * 6; /* in bytes that encode an UTF-8 string */
|
|
+ tsiz = newSViv(BUFSIZE);
|
|
+ buffsize = BUFSIZE;
|
|
}
|
|
|
|
while (! done)
|
|
@@ -387,8 +387,15 @@ parse_stream(XML_Parser parser, SV * ioref)
|
|
|
|
tb = SvPV(tbuff, br);
|
|
if (br > 0) {
|
|
- if (br > buffsize)
|
|
- croak("The input buffer is not large enough for read UTF-8 decoded string");
|
|
+ if (br > buffsize) {
|
|
+ /* The byte count from SvPV can exceed buffsize when the
|
|
+ filehandle has a :utf8 layer, since Perl reads buffsize
|
|
+ characters but multi-byte UTF-8 chars produce more bytes.
|
|
+ Re-obtain the buffer at the required size. */
|
|
+ buffer = XML_GetBuffer(parser, br);
|
|
+ if (! buffer)
|
|
+ croak("Ran out of memory for input buffer");
|
|
+ }
|
|
Copy(tb, buffer, br, char);
|
|
} else
|
|
done = 1;
|
|
diff --git a/t/utf8_stream.t b/t/utf8_stream.t
|
|
new file mode 100644
|
|
index 0000000..a7e55f7
|
|
--- /dev/null
|
|
+++ b/t/utf8_stream.t
|
|
@@ -0,0 +1,40 @@
|
|
+BEGIN { print "1..2\n"; }
|
|
+END { print "not ok 1\n" unless $loaded; }
|
|
+use XML::Parser;
|
|
+$loaded = 1;
|
|
+print "ok 1\n";
|
|
+
|
|
+################################################################
|
|
+# Test parsing from a filehandle with :utf8 layer
|
|
+# Regression test for rt.cpan.org #19859 / GitHub issue #64
|
|
+# A UTF-8 stream caused buffer overflow because SvPV byte count
|
|
+# could exceed the pre-allocated XML_GetBuffer size.
|
|
+
|
|
+use File::Temp qw(tempfile);
|
|
+
|
|
+# Create a temp file with UTF-8 XML content containing multi-byte chars
|
|
+my ($fh, $tmpfile) = tempfile(UNLINK => 1);
|
|
+binmode($fh, ':raw');
|
|
+# Write raw UTF-8 bytes: XML with Chinese characters (3 bytes each in UTF-8)
|
|
+# U+4E16 U+754C (世界 = "world") repeated to create substantial multi-byte content
|
|
+my $body = "\xe4\xb8\x96\xe7\x95\x8c" x 20000; # 120000 bytes / 40000 chars of 3-byte UTF-8
|
|
+print $fh qq(<?xml version="1.0" encoding="UTF-8"?>\n<doc>$body</doc>\n);
|
|
+close($fh);
|
|
+
|
|
+my $text = '';
|
|
+my $parser = XML::Parser->new(
|
|
+ Handlers => {
|
|
+ Char => sub { $text .= $_[1]; },
|
|
+ }
|
|
+);
|
|
+
|
|
+# Open with :utf8 layer - this is what triggers the bug
|
|
+open(my $in, '<:utf8', $tmpfile) or die "Cannot open $tmpfile: $!";
|
|
+eval { $parser->parse($in); };
|
|
+close($in);
|
|
+
|
|
+if ($@ eq '' && length($text) > 0) {
|
|
+ print "ok 2\n";
|
|
+} else {
|
|
+ print "not ok 2 # $@\n";
|
|
+}
|