import expat-2.2.10-12.el9_0

2022-04-05 07:05:50 -04:00 · 2022-04-05 07:05:50 -04:00 · 13e18f5216
commit 13e18f5216
parent 75ddd38220
4 changed files with 672 additions and 1 deletions
--- a/SOURCES/expat-2.2.10-Add-missing-validation-of-encoding.patch
+++ b/SOURCES/expat-2.2.10-Add-missing-validation-of-encoding.patch
@ -0,0 +1,281 @@
 From ee2a5b50e7d1940ba8745715b62ceb9efd3a96da Mon Sep 17 00:00:00 2001
 From: Sebastian Pipping <sebastian@pipping.org>
 Date: Tue, 8 Feb 2022 17:37:14 +0100
 Subject: [PATCH 1/5] lib: Drop unused macro UTF8_GET_NAMING
 ---
 expat/lib/xmltok.c | 5 -----
 1 file changed, 5 deletions(-)
 diff --git a/lib/xmltok.c b/lib/xmltok.c
 index a72200e8..3bddf125 100644
 --- a/lib/xmltok.c
 +++ b/lib/xmltok.c
@@ -98,11 +98,6 @@
         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
    & (1u << (((byte)[2]) & 0x1F)))
 -#define UTF8_GET_NAMING(pages, p, n)                                           \
 -  ((n) == 2                                                                    \
 -       ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p))                   \
 -       : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
 -
 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
    with the additional restriction of not allowing the Unicode
 From 3f0a0cb644438d4d8e3294cd0b1245d0edb0c6c6 Mon Sep 17 00:00:00 2001
 From: Sebastian Pipping <sebastian@pipping.org>
 Date: Tue, 8 Feb 2022 04:32:20 +0100
 Subject: [PATCH 2/5] lib: Add missing validation of encoding (CVE-2022-25235)
 ---
 expat/lib/xmltok_impl.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
 diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
 index 0430591b..64a3b2c1 100644
 --- a/lib/xmltok_impl.c
 +++ b/lib/xmltok_impl.c
@@ -69,7 +69,7 @@
   case BT_LEAD##n:                                                             \
     if (end - ptr < n)                                                         \
       return XML_TOK_PARTIAL_CHAR;                                             \
 -    if (! IS_NAME_CHAR(enc, ptr, n)) {                                         \
 +    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
       *nextTokPtr = ptr;                                                       \
       return XML_TOK_INVALID;                                                  \
     }                                                                          \
@@ -98,7 +98,7 @@
   case BT_LEAD##n:                                                             \
     if (end - ptr < n)                                                         \
       return XML_TOK_PARTIAL_CHAR;                                             \
 -    if (! IS_NMSTRT_CHAR(enc, ptr, n)) {                                       \
 +    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
       *nextTokPtr = ptr;                                                       \
       return XML_TOK_INVALID;                                                  \
     }                                                                          \
@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
   case BT_LEAD##n:                                                             \
     if (end - ptr < n)                                                         \
       return XML_TOK_PARTIAL_CHAR;                                             \
 +    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
 +      *nextTokPtr = ptr;                                                       \
 +      return XML_TOK_INVALID;                                                  \
 +    }                                                                          \
     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
       ptr += n;                                                                \
       tok = XML_TOK_NAME;                                                      \
 From c85a3025e7a1be086dc34e7559fbc543914d047f Mon Sep 17 00:00:00 2001
 From: Sebastian Pipping <sebastian@pipping.org>
 Date: Wed, 9 Feb 2022 01:00:38 +0100
 Subject: [PATCH 3/5] lib: Add comments to BT_LEAD* cases where encoding has
 already been validated
 ---
 expat/lib/xmltok_impl.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
 diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
 index 64a3b2c1..84ff35f9 100644
 --- a/lib/xmltok_impl.c
 +++ b/lib/xmltok_impl.c
@@ -1274,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
 -    ptr += n;                                                                  \
 +    ptr += n; /* NOTE: The encoding has already been validated. */             \
     break;
       LEAD_CASE(2)
       LEAD_CASE(3)
@@ -1343,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
 -    ptr += n;                                                                  \
 +    ptr += n; /* NOTE: The encoding has already been validated. */             \
     break;
       LEAD_CASE(2)
       LEAD_CASE(3)
@@ -1522,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
       state = inName;                                                          \
     }
 #  define LEAD_CASE(n)                                                         \
 -  case BT_LEAD##n:                                                             \
 +  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
     START_NAME ptr += (n - MINBPC(enc));                                       \
     break;
       LEAD_CASE(2)
@@ -1734,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
 -    ptr += n;                                                                  \
 +    ptr += n; /* NOTE: The encoding has already been validated. */             \
     break;
       LEAD_CASE(2)
       LEAD_CASE(3)
@@ -1779,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
 -    ptr += n;                                                                  \
 +    ptr += n; /* NOTE: The encoding has already been validated. */             \
     pos->columnNumber++;                                                       \
     break;
       LEAD_CASE(2)
 From 6a5510bc6b7efe743356296724e0b38300f05379 Mon Sep 17 00:00:00 2001
 From: Sebastian Pipping <sebastian@pipping.org>
 Date: Tue, 8 Feb 2022 04:06:21 +0100
 Subject: [PATCH 4/5] tests: Cover missing validation of encoding
 (CVE-2022-25235)
 ---
 expat/tests/runtests.c | 109 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 diff --git a/tests/runtests.c b/tests/runtests.c
 index bc5344b1..9b155b82 100644
 --- a/tests/runtests.c
 +++ b/tests/runtests.c
@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
 }
 END_TEST
 +START_TEST(test_utf8_in_start_tags) {
 +  struct test_case {
 +    bool goodName;
 +    bool goodNameStart;
 +    const char *tagName;
 +  };
 +
 +  // The idea with the tests below is this:
 +  // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
 +  // go to isNever and are hence not a concern.
 +  //
 +  // We start with a character that is a valid name character
 +  // (or even name-start character, see XML 1.0r4 spec) and then we flip
 +  // single bits at places where (1) the result leaves the UTF-8 encoding space
 +  // and (2) we stay in the same n-byte sequence family.
 +  //
 +  // The flipped bits are highlighted in angle brackets in comments,
 +  // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
 +  // the most significant bit to 1 to leave UTF-8 encoding space.
 +  struct test_case cases[] = {
 +      // 1-byte UTF-8: [0xxx xxxx]
 +      {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
 +      {false, false, "\xBA"}, // [<1>011 1010]
 +      {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
 +      {false, false, "\xB9"}, // [<1>011 1001]
 +
 +      // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
 +      {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
 +                                  // Arabic small waw U+06E5
 +      {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
 +      {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
 +      {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
 +      {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
 +                                  // combining char U+0301
 +      {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
 +      {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
 +      {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
 +
 +      // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
 +      {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
 +                                      // Devanagari Letter A U+0905
 +      {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
 +      {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
 +      {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
 +      {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
 +      {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
 +      {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
 +                                      // combining char U+0901
 +      {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
 +      {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
 +      {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
 +      {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
 +      {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
 +  };
 +  const bool atNameStart[] = {true, false};
 +
 +  size_t i = 0;
 +  char doc[1024];
 +  size_t failCount = 0;
 +
 +  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
 +    size_t j = 0;
 +    for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
 +      const bool expectedSuccess
 +          = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
 +      sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
 +      XML_Parser parser = XML_ParserCreate(NULL);
 +
 +      const enum XML_Status status
 +          = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
 +
 +      bool success = true;
 +      if ((status == XML_STATUS_OK) != expectedSuccess) {
 +        success = false;
 +      }
 +      if ((status == XML_STATUS_ERROR)
 +          && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
 +        success = false;
 +      }
 +
 +      if (! success) {
 +        fprintf(
 +            stderr,
 +            "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
 +            (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
 +            (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
 +        failCount++;
 +      }
 +
 +      XML_ParserFree(parser);
 +    }
 +  }
 +
 +  if (failCount > 0) {
 +    fail("UTF-8 regression detected");
 +  }
 +}
 +END_TEST
 +
 /* Test trailing spaces in elements are accepted */
 static void XMLCALL
 record_element_end_handler(void *userData, const XML_Char *name) {
@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
 }
 END_TEST
 +START_TEST(test_bad_doctype_utf8) {
 +  const char *text = "<!DOCTYPE \xDB\x25"
 +                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
 +  expect_failure(text, XML_ERROR_INVALID_TOKEN,
 +                 "Invalid UTF-8 in DOCTYPE not faulted");
 +}
 +END_TEST
 +
 START_TEST(test_bad_doctype_utf16) {
   const char text[] =
       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
@@ -11870,6 +11977,7 @@ make_suite(void) {
   tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
 +  tcase_add_test(tc_basic, test_utf8_in_start_tags);
   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
   tcase_add_test(tc_basic, test_utf16_attribute);
   tcase_add_test(tc_basic, test_utf16_second_attr);
@@ -11878,6 +11986,7 @@ make_suite(void) {
   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
   tcase_add_test(tc_basic, test_bad_doctype);
 +  tcase_add_test(tc_basic, test_bad_doctype_utf8);
   tcase_add_test(tc_basic, test_bad_doctype_utf16);
   tcase_add_test(tc_basic, test_bad_doctype_plus);
   tcase_add_test(tc_basic, test_bad_doctype_star);
--- a/SOURCES/expat-2.2.10-Prevent-integer-overflow-in-storeRawNames.patch
+++ b/SOURCES/expat-2.2.10-Prevent-integer-overflow-in-storeRawNames.patch
@ -0,0 +1,139 @@
 From eb0362808b4f9f1e2345a0cf203b8cc196d776d9 Mon Sep 17 00:00:00 2001
 From: Samanta Navarro <ferivoz@riseup.net>
 Date: Tue, 15 Feb 2022 11:55:46 +0000
 Subject: [PATCH] Prevent integer overflow in storeRawNames
 It is possible to use an integer overflow in storeRawNames for out of
 boundary heap writes. Default configuration is affected. If compiled
 with XML_UNICODE then the attack does not work. Compiling with
 -fsanitize=address confirms the following proof of concept.
 The problem can be exploited by abusing the m_buffer expansion logic.
 Even though the initial size of m_buffer is a power of two, eventually
 it can end up a little bit lower, thus allowing allocations very close
 to INT_MAX (since INT_MAX/2 can be surpassed). This means that tag
 names can be parsed which are almost INT_MAX in size.
 Unfortunately (from an attacker point of view) INT_MAX/2 is also a
 limitation in string pools. Having a tag name of INT_MAX/2 characters
 or more is not possible.
 Expat can convert between different encodings. UTF-16 documents which
 contain only ASCII representable characters are twice as large as their
 ASCII encoded counter-parts.
 The proof of concept works by taking these three considerations into
 account:
 1. Move the m_buffer size slightly below a power of two by having a
   short root node <a>. This allows the m_buffer to grow very close
   to INT_MAX.
 2. The string pooling forbids tag names longer than or equal to
   INT_MAX/2, so keep the attack tag name smaller than that.
 3. To be able to still overflow INT_MAX even though the name is
   limited at INT_MAX/2-1 (nul byte) we use UTF-16 encoding and a tag
   which only contains ASCII characters. UTF-16 always stores two
   bytes per character while the tag name is converted to using only
   one. Our attack node byte count must be a bit higher than
   2/3 INT_MAX so the converted tag name is around INT_MAX/3 which
   in sum can overflow INT_MAX.
 Thanks to our small root node, m_buffer can handle 2/3 INT_MAX bytes
 without running into INT_MAX boundary check. The string pooling is
 able to store INT_MAX/3 as tag name because the amount is below
 INT_MAX/2 limitation. And creating the sum of both eventually overflows
 in storeRawNames.
 Proof of Concept:
 1. Compile expat with -fsanitize=address.
 2. Create Proof of Concept binary which iterates through input
   file 16 MB at once for better performance and easier integer
   calculations:
 ```
 cat > poc.c << EOF
 #include <err.h>
 #include <expat.h>
 #include <stdlib.h>
 #include <stdio.h>
 #define CHUNK (16 * 1024 * 1024)
 int main(int argc, char *argv[]) {
   XML_Parser parser;
   FILE *fp;
   char *buf;
   int i;
   if (argc != 2)
     errx(1, "usage: poc file.xml");
   if ((parser = XML_ParserCreate(NULL)) == NULL)
     errx(1, "failed to create expat parser");
   if ((fp = fopen(argv[1], "r")) == NULL) {
     XML_ParserFree(parser);
     err(1, "failed to open file");
   }
   if ((buf = malloc(CHUNK)) == NULL) {
     fclose(fp);
     XML_ParserFree(parser);
     err(1, "failed to allocate buffer");
   }
   i = 0;
   while (fread(buf, CHUNK, 1, fp) == 1) {
     printf("iteration %d: XML_Parse returns %d\n", ++i,
       XML_Parse(parser, buf, CHUNK, XML_FALSE));
   }
   free(buf);
   fclose(fp);
   XML_ParserFree(parser);
   return 0;
 }
 EOF
 gcc -fsanitize=address -lexpat -o poc poc.c
 ```
 3. Construct specially prepared UTF-16 XML file:
 ```
 dd if=/dev/zero bs=1024 count=794624 | tr '\0' 'a' > poc-utf8.xml
 echo -n '<a><' | dd conv=notrunc of=poc-utf8.xml
 echo -n '><' | dd conv=notrunc of=poc-utf8.xml bs=1 seek=805306368
 iconv -f UTF-8 -t UTF-16LE poc-utf8.xml > poc-utf16.xml
 ```
 4. Run proof of concept:
 ```
 ./poc poc-utf16.xml
 ```
 ---
 expat/lib/xmlparse.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)
 diff --git a/lib/xmlparse.c b/lib/xmlparse.c
 index 4b43e613..f34d6ab5 100644
 --- a/lib/xmlparse.c
 +++ b/lib/xmlparse.c
@@ -2563,6 +2563,7 @@ storeRawNames(XML_Parser parser) {
   while (tag) {
     int bufSize;
     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
 +    size_t rawNameLen;
     char *rawNameBuf = tag->buf + nameLen;
     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
        at the first entry that has already been copied; everything
@@ -2574,7 +2575,11 @@ storeRawNames(XML_Parser parser) {
     /* For re-use purposes we need to ensure that the
        size of tag->buf is a multiple of sizeof(XML_Char).
     */
 -    bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
 +    rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
 +    /* Detect and prevent integer overflow. */
 +    if (rawNameLen > (size_t)INT_MAX - nameLen)
 +      return XML_FALSE;
 +    bufSize = nameLen + (int)rawNameLen;
     if (bufSize > tag->bufEnd - tag->buf) {
       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
       if (temp == NULL)
--- a/SOURCES/expat-2.2.10-Protect-against-malicious-namespace-declarations.patch
+++ b/SOURCES/expat-2.2.10-Protect-against-malicious-namespace-declarations.patch
@ -0,0 +1,228 @@
 commit 5c47ae80738d0985babf06a023b3845169682064
 Author: Tomas Korbar <tkorbar@redhat.com>
 Date:   Mon Mar 14 10:22:37 2022 +0100
    Protect against malicious namespace declarations
 diff --git a/lib/xmlparse.c b/lib/xmlparse.c
 index 5c3f573..901abbf 100644
 --- a/lib/xmlparse.c
 +++ b/lib/xmlparse.c
@@ -638,8 +638,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
 XML_Parser XMLCALL
 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
 -  XML_Char tmp[2];
 -  *tmp = nsSep;
 +  XML_Char tmp[2] = {nsSep, 0};
   return XML_ParserCreate_MM(encodingName, NULL, tmp);
 }
@@ -1253,8 +1252,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
      would be otherwise.
   */
   if (parser->m_ns) {
 -    XML_Char tmp[2];
 -    *tmp = parser->m_namespaceSeparator;
 +    XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
   } else {
     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
@@ -3526,6 +3524,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
   return XML_ERROR_NONE;
 }
 +static XML_Bool
 +is_rfc3986_uri_char(XML_Char candidate) {
 +  // For the RFC 3986 ANBF grammar see
 +  // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
 +
 +  switch (candidate) {
 +  // From rule "ALPHA" (uppercase half)
 +  case 'A':
 +  case 'B':
 +  case 'C':
 +  case 'D':
 +  case 'E':
 +  case 'F':
 +  case 'G':
 +  case 'H':
 +  case 'I':
 +  case 'J':
 +  case 'K':
 +  case 'L':
 +  case 'M':
 +  case 'N':
 +  case 'O':
 +  case 'P':
 +  case 'Q':
 +  case 'R':
 +  case 'S':
 +  case 'T':
 +  case 'U':
 +  case 'V':
 +  case 'W':
 +  case 'X':
 +  case 'Y':
 +  case 'Z':
 +
 +  // From rule "ALPHA" (lowercase half)
 +  case 'a':
 +  case 'b':
 +  case 'c':
 +  case 'd':
 +  case 'e':
 +  case 'f':
 +  case 'g':
 +  case 'h':
 +  case 'i':
 +  case 'j':
 +  case 'k':
 +  case 'l':
 +  case 'm':
 +  case 'n':
 +  case 'o':
 +  case 'p':
 +  case 'q':
 +  case 'r':
 +  case 's':
 +  case 't':
 +  case 'u':
 +  case 'v':
 +  case 'w':
 +  case 'x':
 +  case 'y':
 +  case 'z':
 +
 +  // From rule "DIGIT"
 +  case '0':
 +  case '1':
 +  case '2':
 +  case '3':
 +  case '4':
 +  case '5':
 +  case '6':
 +  case '7':
 +  case '8':
 +  case '9':
 +
 +  // From rule "pct-encoded"
 +  case '%':
 +
 +  // From rule "unreserved"
 +  case '-':
 +  case '.':
 +  case '_':
 +  case '~':
 +
 +  // From rule "gen-delims"
 +  case ':':
 +  case '/':
 +  case '?':
 +  case '#':
 +  case '[':
 +  case ']':
 +  case '@':
 +
 +  // From rule "sub-delims"
 +  case '!':
 +  case '$':
 +  case '&':
 +  case '\'':
 +  case '(':
 +  case ')':
 +  case '*':
 +  case '+':
 +  case ',':
 +  case ';':
 +  case '=':
 +    return XML_TRUE;
 +
 +  default:
 +    return XML_FALSE;
 +  }
 +}
 +
 /* addBinding() overwrites the value of prefix->binding without checking.
    Therefore one must keep track of the old value outside of addBinding().
 */
@@ -3581,6 +3690,29 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
     if (! mustBeXML && isXMLNS
         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
       isXMLNS = XML_FALSE;
 +
 +    // NOTE: While Expat does not validate namespace URIs against RFC 3986
 +    //       today (and is not REQUIRED to do so with regard to the XML 1.0
 +    //       namespaces specification) we have to at least make sure, that
 +    //       the application on top of Expat (that is likely splitting expanded
 +    //       element names ("qualified names") of form
 +    //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
 +    //       in its element handler code) cannot be confused by an attacker
 +    //       putting additional namespace separator characters into namespace
 +    //       declarations.  That would be ambiguous and not to be expected.
 +    //
 +    //       While the HTML API docs of function XML_ParserCreateNS have been
 +    //       advising against use of a namespace separator character that can
 +    //       appear in a URI for >20 years now, some widespread applications
 +    //       are using URI characters (':' (colon) in particular) for a
 +    //       namespace separator, in practice.  To keep these applications
 +    //       functional, we only reject namespaces URIs containing the
 +    //       application-chosen namespace separator if the chosen separator
 +    //       is a non-URI character with regard to RFC 3986.
 +    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
 +        && ! is_rfc3986_uri_char(uri[len])) {
 +      return XML_ERROR_SYNTAX;
 +    }
   }
   isXML = isXML && len == xmlLen;
   isXMLNS = isXMLNS && len == xmlnsLen;
 diff --git a/tests/runtests.c b/tests/runtests.c
 index f03e008..40172d2 100644
 --- a/tests/runtests.c
 +++ b/tests/runtests.c
@@ -7233,6 +7233,37 @@ START_TEST(test_ns_double_colon_doctype) {
 }
 END_TEST
 +START_TEST(test_ns_separator_in_uri) {
 +  struct test_case {
 +    enum XML_Status expectedStatus;
 +    const char *doc;
 +    XML_Char namesep;
 +  };
 +  struct test_case cases[] = {
 +      {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
 +      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
 +      {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
 +  };
 +
 +  size_t i = 0;
 +  size_t failCount = 0;
 +  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
 +    XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
 +    XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
 +    if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
 +                  /*isFinal*/ XML_TRUE)
 +        != cases[i].expectedStatus) {
 +      failCount++;
 +    }
 +    XML_ParserFree(parser);
 +  }
 +
 +  if (failCount) {
 +    fail("Namespace separator handling is broken");
 +  }
 +}
 +END_TEST
 +
 /* Control variable; the number of times duff_allocator() will successfully
  * allocate */
 #define ALLOC_ALWAYS_SUCCEED (-1)
@@ -11527,6 +11558,7 @@ make_suite(void) {
   tcase_add_test(tc_namespace, test_ns_utf16_doctype);
   tcase_add_test(tc_namespace, test_ns_invalid_doctype);
   tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
 +  tcase_add_test(tc_namespace, test_ns_separator_in_uri);
   suite_add_tcase(s, tc_misc);
   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
--- a/SPECS/expat.spec
+++ b/SPECS/expat.spec
@ -3,7 +3,7 @@
 Summary: An XML parser library
 Name: expat
 Version: %(echo %{unversion} | sed 's/_/./g')
-Release: 9%{?dist}
+Release: 12%{?dist}
 Source: https://github.com/libexpat/libexpat/archive/R_%{unversion}.tar.gz#/expat-%{version}.tar.gz
 URL: https://libexpat.github.io/
 License: MIT
@ -14,6 +14,9 @@ Patch1:	expat-2.2.10-Prevent-more-integer-overflows.patch
 Patch2:	expat-2.2.10-Prevent-integer-overflow-on-m_groupSize-in-function.patch
 Patch3:	expat-2.2.10-Detect-and-prevent-troublesome-left-shifts.patch
 Patch4:	expat-2.2.10-Detect-and-prevent-integer-overflow-in-XML_GetBuffer.patch
 Patch5: expat-2.2.10-Protect-against-malicious-namespace-declarations.patch
 Patch6: expat-2.2.10-Add-missing-validation-of-encoding.patch
 Patch7: expat-2.2.10-Prevent-integer-overflow-in-storeRawNames.patch
 %description
 This is expat, the C library for parsing XML, written by James Clark. Expat
@ -46,6 +49,9 @@ Install it if you need to link statically with expat.
 %patch2 -p1 -b .CVE-2021-46143
 %patch3 -p1 -b .CVE-2021-45960
 %patch4 -p1 -b .CVE-2022-23852
 %patch5 -p1 -b .CVE-2022-25236
 %patch6 -p1 -b .CVE-2022-25235
 %patch7 -p1 -b .CVE-2022-25315
 sed -i 's/install-data-hook/do-nothing-please/' lib/Makefile.am
 ./buildconf.sh
@ -83,6 +89,23 @@ make check
 %{_libdir}/lib*.a
 %changelog
 * Wed Mar 16 2022 Tomas Korbar <tkorbar@redhat.com> -  2.2.10-12
 - Build fix for CVE-2022-25236 in rhel-9.0.0
 - Related: CVE-2022-25236
 * Mon Mar 14 2022 Tomas Korbar <tkorbar@redhat.com> -  2.2.10-11
 - Improve fix for CVE-2022-25236
 - Related: CVE-2022-25236
 * Mon Feb 28 2022 Tomas Korbar <tkorbar@redhat.com> -  2.2.10-10
 - Fix multiple CVEs
 - CVE-2022-25236 expat: namespace-separator characters in "xmlns[:prefix]" attribute values can lead to arbitrary code execution
 - CVE-2022-25235 expat: malformed 2- and 3-byte UTF-8 sequences can lead to arbitrary code execution
 - CVE-2022-25315 expat: integer overflow in storeRawNames()
 - Resolves: CVE-2022-25236
 - Resolves: CVE-2022-25235
 - Resolves: CVE-2022-25315
 * Thu Feb 10 2022 Tomas Korbar <tkorbar@redhat.com> -  2.2.10-9
 - CVE-2022-23852 expat: integer overflow in function XML_GetBuffer
 - Resolves: CVE-2022-23852