commit fd5473ef5873048eadef344a1f16f71ad8eefe99 Author: Tomas Korbar Date: Mon Mar 14 12:17:41 2022 +0100 Protect against malicious namespace declarations diff --git a/lib/xmlparse.c b/lib/xmlparse.c index 581b9a4..6f3510b 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -661,8 +661,7 @@ XML_ParserCreate(const XML_Char *encodingName) XML_Parser XMLCALL XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { - XML_Char tmp[2]; - *tmp = nsSep; + XML_Char tmp[2] = {nsSep, 0}; return XML_ParserCreate_MM(encodingName, NULL, tmp); } @@ -1288,8 +1287,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, would be otherwise. */ if (parser->m_ns) { - XML_Char tmp[2]; - *tmp = parser->m_namespaceSeparator; + XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); } else { @@ -3640,6 +3638,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, return XML_ERROR_NONE; } +static XML_Bool +is_rfc3986_uri_char(XML_Char candidate) { + // For the RFC 3986 ANBF grammar see + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + switch (candidate) { + // From rule "ALPHA" (uppercase half) + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + + // From rule "ALPHA" (lowercase half) + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + + // From rule "DIGIT" + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + // From rule "pct-encoded" + case '%': + + // From rule "unreserved" + case '-': + case '.': + case '_': + case '~': + + // From rule "gen-delims" + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + + // From rule "sub-delims" + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return XML_TRUE; + + default: + return XML_FALSE; + } +} + /* addBinding() overwrites the value of prefix->binding without checking. Therefore one must keep track of the old value outside of addBinding(). */ @@ -3700,6 +3809,29 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, if (!mustBeXML && isXMLNS && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; + + // NOTE: While Expat does not validate namespace URIs against RFC 3986 + // today (and is not REQUIRED to do so with regard to the XML 1.0 + // namespaces specification) we have to at least make sure, that + // the application on top of Expat (that is likely splitting expanded + // element names ("qualified names") of form + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces + // in its element handler code) cannot be confused by an attacker + // putting additional namespace separator characters into namespace + // declarations. That would be ambiguous and not to be expected. + // + // While the HTML API docs of function XML_ParserCreateNS have been + // advising against use of a namespace separator character that can + // appear in a URI for >20 years now, some widespread applications + // are using URI characters (':' (colon) in particular) for a + // namespace separator, in practice. To keep these applications + // functional, we only reject namespaces URIs containing the + // application-chosen namespace separator if the chosen separator + // is a non-URI character with regard to RFC 3986. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) + && ! is_rfc3986_uri_char(uri[len])) { + return XML_ERROR_SYNTAX; + } } isXML = isXML && len == xmlLen; isXMLNS = isXMLNS && len == xmlnsLen; diff --git a/tests/runtests.c b/tests/runtests.c index ecc6f47..eabd55d 100644 --- a/tests/runtests.c +++ b/tests/runtests.c @@ -7950,6 +7950,38 @@ START_TEST(test_ns_double_colon_doctype) } END_TEST +START_TEST(test_ns_separator_in_uri) { + struct test_case { + enum XML_Status expectedStatus; + const char *doc; + XML_Char namesep; + }; + struct test_case cases[] = { + {XML_STATUS_OK, "", XCS('\n')}, + {XML_STATUS_ERROR, "", XCS('\n')}, + {XML_STATUS_OK, "", XCS(':')}, + }; + + size_t i = 0; + size_t failCount = 0; + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep); + XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); + if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), + /*isFinal*/ XML_TRUE) + != cases[i].expectedStatus) { + failCount++; + } + XML_ParserFree(parser); + } + + if (failCount) { + fail("Namespace separator handling is broken"); + } +} +END_TEST + + /* Control variable; the number of times duff_allocator() will successfully allocate */ #define ALLOC_ALWAYS_SUCCEED (-1) #define REALLOC_ALWAYS_SUCCEED (-1) @@ -12290,6 +12322,7 @@ make_suite(void) tcase_add_test(tc_namespace, test_ns_utf16_doctype); tcase_add_test(tc_namespace, test_ns_invalid_doctype); tcase_add_test(tc_namespace, test_ns_double_colon_doctype); + tcase_add_test(tc_namespace, test_ns_separator_in_uri); suite_add_tcase(s, tc_misc); tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);