From 6aee99f381cc5bdfb6e514ac1e82f5e7b0fa7e2d Mon Sep 17 00:00:00 2001 From: Rob Crittenden Date: Fri, 25 Feb 2022 16:42:35 -0500 Subject: [PATCH 5/6] Add missing validation of encoding (CVE-2022-25235) Backported from upstream https://github.com/libexpat/libexpat/pull/562 Resolves: #2058114 --- lib/expat/xmltok/xmltok.c | 21 +++++++++++++++------ lib/expat/xmltok/xmltok_impl.c | 8 ++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lib/expat/xmltok/xmltok.c b/lib/expat/xmltok/xmltok.c index 7b31fbb..3b0c950 100644 --- a/lib/expat/xmltok/xmltok.c +++ b/lib/expat/xmltok/xmltok.c @@ -61,12 +61,17 @@ We need 8 bits to index into pages, 3 bits to add to that index and ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) +#define UTF8_INVALID2(p) \ + ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) + #define UTF8_INVALID3(p) \ - ((*p) == 0xED \ - ? (((p)[1] & 0x20) != 0) \ - : ((*p) == 0xEF \ - ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ - : 0)) + (((p)[2] & 0x80) == 0 \ + || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \ + : ((p)[2] & 0xC0) == 0xC0) \ + || ((*p) == 0xE0 \ + ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ + : ((p)[1] & 0x80) == 0 \ + || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) #define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) @@ -104,7 +109,11 @@ int utf8_isNmstrt3(const ENCODING *enc ATTR_UNUSED, const char *p) #define utf8_isNmstrt4 isNever -#define utf8_isInvalid2 isNever +static +int utf8_isInvalid2(const ENCODING *enc ATTR_UNUSED, const char *p) +{ + return UTF8_INVALID2((const unsigned char *)p); +} static int utf8_isInvalid3(const ENCODING *enc ATTR_UNUSED, const char *p) diff --git a/lib/expat/xmltok/xmltok_impl.c b/lib/expat/xmltok/xmltok_impl.c index d035527..bae79b9 100644 --- a/lib/expat/xmltok/xmltok_impl.c +++ b/lib/expat/xmltok/xmltok_impl.c @@ -43,7 +43,7 @@ See the file copying.txt for copying permission. case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NAME_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NAME_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -71,7 +71,7 @@ See the file copying.txt for copying permission. case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || !IS_NMSTRT_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -1168,6 +1168,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ -- 2.31.1