expat/SOURCES/expat-2.2.10-Protect-against-malicious-namespace-declarations.patch

commit 5c47ae80738d0985babf06a023b3845169682064
Author: Tomas Korbar <tkorbar@redhat.com>
Date:   Mon Mar 14 10:22:37 2022 +0100

    Protect against malicious namespace declarations

diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 5c3f573..901abbf 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -638,8 +638,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
 
 XML_Parser XMLCALL
 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
-  XML_Char tmp[2];
-  *tmp = nsSep;
+  XML_Char tmp[2] = {nsSep, 0};
   return XML_ParserCreate_MM(encodingName, NULL, tmp);
 }
 
@@ -1253,8 +1252,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
      would be otherwise.
   */
   if (parser->m_ns) {
-    XML_Char tmp[2];
-    *tmp = parser->m_namespaceSeparator;
+    XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
   } else {
     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
@@ -3526,6 +3524,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
   return XML_ERROR_NONE;
 }
 
+static XML_Bool
+is_rfc3986_uri_char(XML_Char candidate) {
+  // For the RFC 3986 ANBF grammar see
+  // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
+
+  switch (candidate) {
+  // From rule "ALPHA" (uppercase half)
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'D':
+  case 'E':
+  case 'F':
+  case 'G':
+  case 'H':
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'Q':
+  case 'R':
+  case 'S':
+  case 'T':
+  case 'U':
+  case 'V':
+  case 'W':
+  case 'X':
+  case 'Y':
+  case 'Z':
+
+  // From rule "ALPHA" (lowercase half)
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'e':
+  case 'f':
+  case 'g':
+  case 'h':
+  case 'i':
+  case 'j':
+  case 'k':
+  case 'l':
+  case 'm':
+  case 'n':
+  case 'o':
+  case 'p':
+  case 'q':
+  case 'r':
+  case 's':
+  case 't':
+  case 'u':
+  case 'v':
+  case 'w':
+  case 'x':
+  case 'y':
+  case 'z':
+
+  // From rule "DIGIT"
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+
+  // From rule "pct-encoded"
+  case '%':
+
+  // From rule "unreserved"
+  case '-':
+  case '.':
+  case '_':
+  case '~':
+
+  // From rule "gen-delims"
+  case ':':
+  case '/':
+  case '?':
+  case '#':
+  case '[':
+  case ']':
+  case '@':
+
+  // From rule "sub-delims"
+  case '!':
+  case '$':
+  case '&':
+  case '\'':
+  case '(':
+  case ')':
+  case '*':
+  case '+':
+  case ',':
+  case ';':
+  case '=':
+    return XML_TRUE;
+
+  default:
+    return XML_FALSE;
+  }
+}
+
 /* addBinding() overwrites the value of prefix->binding without checking.
    Therefore one must keep track of the old value outside of addBinding().
 */
@@ -3581,6 +3690,29 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
     if (! mustBeXML && isXMLNS
         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
       isXMLNS = XML_FALSE;
+
+    // NOTE: While Expat does not validate namespace URIs against RFC 3986
+    //       today (and is not REQUIRED to do so with regard to the XML 1.0
+    //       namespaces specification) we have to at least make sure, that
+    //       the application on top of Expat (that is likely splitting expanded
+    //       element names ("qualified names") of form
+    //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
+    //       in its element handler code) cannot be confused by an attacker
+    //       putting additional namespace separator characters into namespace
+    //       declarations.  That would be ambiguous and not to be expected.
+    //
+    //       While the HTML API docs of function XML_ParserCreateNS have been
+    //       advising against use of a namespace separator character that can
+    //       appear in a URI for >20 years now, some widespread applications
+    //       are using URI characters (':' (colon) in particular) for a
+    //       namespace separator, in practice.  To keep these applications
+    //       functional, we only reject namespaces URIs containing the
+    //       application-chosen namespace separator if the chosen separator
+    //       is a non-URI character with regard to RFC 3986.
+    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
+        && ! is_rfc3986_uri_char(uri[len])) {
+      return XML_ERROR_SYNTAX;
+    }
   }
   isXML = isXML && len == xmlLen;
   isXMLNS = isXMLNS && len == xmlnsLen;
diff --git a/tests/runtests.c b/tests/runtests.c
index f03e008..40172d2 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -7233,6 +7233,37 @@ START_TEST(test_ns_double_colon_doctype) {
 }
 END_TEST
 
+START_TEST(test_ns_separator_in_uri) {
+  struct test_case {
+    enum XML_Status expectedStatus;
+    const char *doc;
+    XML_Char namesep;
+  };
+  struct test_case cases[] = {
+      {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
+      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
+      {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
+  };
+
+  size_t i = 0;
+  size_t failCount = 0;
+  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+    XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
+    XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
+    if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
+                  /*isFinal*/ XML_TRUE)
+        != cases[i].expectedStatus) {
+      failCount++;
+    }
+    XML_ParserFree(parser);
+  }
+
+  if (failCount) {
+    fail("Namespace separator handling is broken");
+  }
+}
+END_TEST
+
 /* Control variable; the number of times duff_allocator() will successfully
  * allocate */
 #define ALLOC_ALWAYS_SUCCEED (-1)
@@ -11527,6 +11558,7 @@ make_suite(void) {
   tcase_add_test(tc_namespace, test_ns_utf16_doctype);
   tcase_add_test(tc_namespace, test_ns_invalid_doctype);
   tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
+  tcase_add_test(tc_namespace, test_ns_separator_in_uri);
 
   suite_add_tcase(s, tc_misc);
   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
import expat-2.2.10-12.el9_0 2022-05-17 10:32:24 +00:00			`commit 5c47ae80738d0985babf06a023b3845169682064`
			`Author: Tomas Korbar <tkorbar@redhat.com>`
			`Date: Mon Mar 14 10:22:37 2022 +0100`

			`Protect against malicious namespace declarations`

			`diff --git a/lib/xmlparse.c b/lib/xmlparse.c`
			`index 5c3f573..901abbf 100644`
			`--- a/lib/xmlparse.c`
			`+++ b/lib/xmlparse.c`
			`@@ -638,8 +638,7 @@ XML_ParserCreate(const XML_Char *encodingName) {`

			`XML_Parser XMLCALL`
			`XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {`
			`- XML_Char tmp[2];`
			`- *tmp = nsSep;`
			`+ XML_Char tmp[2] = {nsSep, 0};`
			`return XML_ParserCreate_MM(encodingName, NULL, tmp);`
			`}`

			`@@ -1253,8 +1252,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,`
			`would be otherwise.`
			`*/`
			`if (parser->m_ns) {`
			`- XML_Char tmp[2];`
			`- *tmp = parser->m_namespaceSeparator;`
			`+ XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};`
			`parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);`
			`} else {`
			`parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);`
			`@@ -3526,6 +3524,117 @@ storeAtts(XML_Parser parser, const ENCODING enc, const char attStr,`
			`return XML_ERROR_NONE;`
			`}`

			`+static XML_Bool`
			`+is_rfc3986_uri_char(XML_Char candidate) {`
			`+ // For the RFC 3986 ANBF grammar see`
			`+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A`
			`+`
			`+ switch (candidate) {`
			`+ // From rule "ALPHA" (uppercase half)`
			`+ case 'A':`
			`+ case 'B':`
			`+ case 'C':`
			`+ case 'D':`
			`+ case 'E':`
			`+ case 'F':`
			`+ case 'G':`
			`+ case 'H':`
			`+ case 'I':`
			`+ case 'J':`
			`+ case 'K':`
			`+ case 'L':`
			`+ case 'M':`
			`+ case 'N':`
			`+ case 'O':`
			`+ case 'P':`
			`+ case 'Q':`
			`+ case 'R':`
			`+ case 'S':`
			`+ case 'T':`
			`+ case 'U':`
			`+ case 'V':`
			`+ case 'W':`
			`+ case 'X':`
			`+ case 'Y':`
			`+ case 'Z':`
			`+`
			`+ // From rule "ALPHA" (lowercase half)`
			`+ case 'a':`
			`+ case 'b':`
			`+ case 'c':`
			`+ case 'd':`
			`+ case 'e':`
			`+ case 'f':`
			`+ case 'g':`
			`+ case 'h':`
			`+ case 'i':`
			`+ case 'j':`
			`+ case 'k':`
			`+ case 'l':`
			`+ case 'm':`
			`+ case 'n':`
			`+ case 'o':`
			`+ case 'p':`
			`+ case 'q':`
			`+ case 'r':`
			`+ case 's':`
			`+ case 't':`
			`+ case 'u':`
			`+ case 'v':`
			`+ case 'w':`
			`+ case 'x':`
			`+ case 'y':`
			`+ case 'z':`
			`+`
			`+ // From rule "DIGIT"`
			`+ case '0':`
			`+ case '1':`
			`+ case '2':`
			`+ case '3':`
			`+ case '4':`
			`+ case '5':`
			`+ case '6':`
			`+ case '7':`
			`+ case '8':`
			`+ case '9':`
			`+`
			`+ // From rule "pct-encoded"`
			`+ case '%':`
			`+`
			`+ // From rule "unreserved"`
			`+ case '-':`
			`+ case '.':`
			`+ case '_':`
			`+ case '~':`
			`+`
			`+ // From rule "gen-delims"`
			`+ case ':':`
			`+ case '/':`
			`+ case '?':`
			`+ case '#':`
			`+ case '[':`
			`+ case ']':`
			`+ case '@':`
			`+`
			`+ // From rule "sub-delims"`
			`+ case '!':`
			`+ case '$':`
			`+ case '&':`
			`+ case '\'':`
			`+ case '(':`
			`+ case ')':`
			`+ case '*':`
			`+ case '+':`
			`+ case ',':`
			`+ case ';':`
			`+ case '=':`
			`+ return XML_TRUE;`
			`+`
			`+ default:`
			`+ return XML_FALSE;`
			`+ }`
			`+}`
			`+`
			`/* addBinding() overwrites the value of prefix->binding without checking.`
			`Therefore one must keep track of the old value outside of addBinding().`
			`*/`
			`@@ -3581,6 +3690,29 @@ addBinding(XML_Parser parser, PREFIX prefix, const ATTRIBUTE_ID attId,`
			`if (! mustBeXML && isXMLNS`
			`&& (len > xmlnsLen \|\| uri[len] != xmlnsNamespace[len]))`
			`isXMLNS = XML_FALSE;`
			`+`
			`+ // NOTE: While Expat does not validate namespace URIs against RFC 3986`
			`+ // today (and is not REQUIRED to do so with regard to the XML 1.0`
			`+ // namespaces specification) we have to at least make sure, that`
			`+ // the application on top of Expat (that is likely splitting expanded`
			`+ // element names ("qualified names") of form`
			`+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces`
			`+ // in its element handler code) cannot be confused by an attacker`
			`+ // putting additional namespace separator characters into namespace`
			`+ // declarations. That would be ambiguous and not to be expected.`
			`+ //`
			`+ // While the HTML API docs of function XML_ParserCreateNS have been`
			`+ // advising against use of a namespace separator character that can`
			`+ // appear in a URI for >20 years now, some widespread applications`
			`+ // are using URI characters (':' (colon) in particular) for a`
			`+ // namespace separator, in practice. To keep these applications`
			`+ // functional, we only reject namespaces URIs containing the`
			`+ // application-chosen namespace separator if the chosen separator`
			`+ // is a non-URI character with regard to RFC 3986.`
			`+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)`
			`+ && ! is_rfc3986_uri_char(uri[len])) {`
			`+ return XML_ERROR_SYNTAX;`
			`+ }`
			`}`
			`isXML = isXML && len == xmlLen;`
			`isXMLNS = isXMLNS && len == xmlnsLen;`
			`diff --git a/tests/runtests.c b/tests/runtests.c`
			`index f03e008..40172d2 100644`
			`--- a/tests/runtests.c`
			`+++ b/tests/runtests.c`
			`@@ -7233,6 +7233,37 @@ START_TEST(test_ns_double_colon_doctype) {`
			`}`
			`END_TEST`

			`+START_TEST(test_ns_separator_in_uri) {`
			`+ struct test_case {`
			`+ enum XML_Status expectedStatus;`
			`+ const char *doc;`
			`+ XML_Char namesep;`
			`+ };`
			`+ struct test_case cases[] = {`
			`+ {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},`
			`+ {XML_STATUS_ERROR, "<doc xmlns='one two' />", XCS('\n')},`
			`+ {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},`
			`+ };`
			`+`
			`+ size_t i = 0;`
			`+ size_t failCount = 0;`
			`+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {`
			`+ XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);`
			`+ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);`
			`+ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),`
			`+ /isFinal/ XML_TRUE)`
			`+ != cases[i].expectedStatus) {`
			`+ failCount++;`
			`+ }`
			`+ XML_ParserFree(parser);`
			`+ }`
			`+`
			`+ if (failCount) {`
			`+ fail("Namespace separator handling is broken");`
			`+ }`
			`+}`
			`+END_TEST`
			`+`
			`/* Control variable; the number of times duff_allocator() will successfully`
			`* allocate */`
			`#define ALLOC_ALWAYS_SUCCEED (-1)`
			`@@ -11527,6 +11558,7 @@ make_suite(void) {`
			`tcase_add_test(tc_namespace, test_ns_utf16_doctype);`
			`tcase_add_test(tc_namespace, test_ns_invalid_doctype);`
			`tcase_add_test(tc_namespace, test_ns_double_colon_doctype);`
			`+ tcase_add_test(tc_namespace, test_ns_separator_in_uri);`

			`suite_add_tcase(s, tc_misc);`
			`tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);`