From b7fc82093961f282b986a62c582b4ca2fcc303db Mon Sep 17 00:00:00 2001 From: Pali Date: Tue, 16 Aug 2016 19:07:31 +0200 Subject: [PATCH] Encode::utf8: Check for overflowed and overlong UTF-8 sequences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Petr Písař --- Encode.xs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Encode.xs b/Encode.xs index 60de62c..1906f0c 100644 --- a/Encode.xs +++ b/Encode.xs @@ -331,6 +331,13 @@ strict_utf8(pTHX_ SV* sv) #define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c)) +#ifndef UTF_ACCUMULATION_OVERFLOW_MASK +#ifndef CHARBITS +#define CHARBITS CHAR_BIT +#endif +#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT)) +#endif + /* * Convert non strict utf8 sequence of len >= 2 to unicode codepoint */ @@ -339,6 +346,7 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) { UV uv; U8 *ptr = s; + bool overflowed = 0; uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len); @@ -350,11 +358,17 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) *rlen = s-ptr; return 0; } + if (uv & UTF_ACCUMULATION_OVERFLOW_MASK) + overflowed = 1; uv = UTF8_ACCUMULATE(uv, *s); s++; } *rlen = s-ptr; + + if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) + return 0; + return uv; } -- 2.7.4