62 lines
1.5 KiB
Diff
62 lines
1.5 KiB
Diff
From b7fc82093961f282b986a62c582b4ca2fcc303db Mon Sep 17 00:00:00 2001
|
|
From: Pali <pali@cpan.org>
|
|
Date: Tue, 16 Aug 2016 19:07:31 +0200
|
|
Subject: [PATCH] Encode::utf8: Check for overflowed and overlong UTF-8
|
|
sequences
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
---
|
|
Encode.xs | 14 ++++++++++++++
|
|
1 file changed, 14 insertions(+)
|
|
|
|
diff --git a/Encode.xs b/Encode.xs
|
|
index 60de62c..1906f0c 100644
|
|
--- a/Encode.xs
|
|
+++ b/Encode.xs
|
|
@@ -331,6 +331,13 @@ strict_utf8(pTHX_ SV* sv)
|
|
|
|
#define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c))
|
|
|
|
+#ifndef UTF_ACCUMULATION_OVERFLOW_MASK
|
|
+#ifndef CHARBITS
|
|
+#define CHARBITS CHAR_BIT
|
|
+#endif
|
|
+#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT))
|
|
+#endif
|
|
+
|
|
/*
|
|
* Convert non strict utf8 sequence of len >= 2 to unicode codepoint
|
|
*/
|
|
@@ -339,6 +346,7 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen)
|
|
{
|
|
UV uv;
|
|
U8 *ptr = s;
|
|
+ bool overflowed = 0;
|
|
|
|
uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len);
|
|
|
|
@@ -350,11 +358,17 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen)
|
|
*rlen = s-ptr;
|
|
return 0;
|
|
}
|
|
+ if (uv & UTF_ACCUMULATION_OVERFLOW_MASK)
|
|
+ overflowed = 1;
|
|
uv = UTF8_ACCUMULATE(uv, *s);
|
|
s++;
|
|
}
|
|
|
|
*rlen = s-ptr;
|
|
+
|
|
+ if (overflowed || *rlen > (STRLEN)UNISKIP(uv))
|
|
+ return 0;
|
|
+
|
|
return uv;
|
|
}
|
|
|
|
--
|
|
2.7.4
|
|
|