diff --git a/expat-2.2.5-CVE-2023-52425.patch b/expat-2.2.5-CVE-2023-52425.patch new file mode 100644 index 0000000..564b01f --- /dev/null +++ b/expat-2.2.5-CVE-2023-52425.patch @@ -0,0 +1,1393 @@ +commit d9904191c90476ed039ce9d33aee9ef56c807f8e +Author: Tomas Korbar +Date: Mon Mar 25 14:25:24 2024 +0100 + + Fix CVE-2023-52425 + + upstream PR #789 + +diff --git a/expat/Makefile.am b/expat/Makefile.am +index 5ed9ac4..0e3181d 100644 +--- a/expat/Makefile.am ++++ b/expat/Makefile.am +@@ -120,6 +120,11 @@ buildlib: + run-benchmark: + $(MAKE) -C tests/benchmark + ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/recset.xml 65535 3 ++ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_attr.xml 4096 3 ++ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_cdata.xml 4096 3 ++ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_comment.xml 4096 3 ++ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_tag.xml 4096 3 ++ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_text.xml 4096 3 + + tests/xmlts.zip: + if test "$(XMLTS_ZIP)" = ""; then \ +diff --git a/expat/doc/reference.html b/expat/doc/reference.html +index efc19f4..95c33c7 100644 +--- a/expat/doc/reference.html ++++ b/expat/doc/reference.html +@@ -1996,6 +1996,27 @@ parse position may be before the beginning of the buffer.

+ return NULL.

+ + ++

XML_SetReparseDeferralEnabled

++
++/* Added in Expat 2.6.0. */
++XML_Bool XMLCALL
++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
++
++
++

++ Large tokens may require many parse calls before enough data is available for Expat to parse it in full. ++ If Expat retried parsing the token on every parse call, parsing could take quadratic time. ++ To avoid this, Expat only retries once a significant amount of new data is available. ++ This function allows disabling this behavior. ++

++

++ The enabled argument should be XML_TRUE or XML_FALSE. ++

++

++ Returns XML_TRUE on success, and XML_FALSE on error. ++

++
++ +

Miscellaneous functions

+ +

The functions in this section either obtain state information from +diff --git a/expat/doc/xmlwf.xml b/expat/doc/xmlwf.xml +index 5e2a4ae..6b719eb 100644 +--- a/expat/doc/xmlwf.xml ++++ b/expat/doc/xmlwf.xml +@@ -246,6 +246,16 @@ supports both. + + + ++ ++ ++ ++ ++ Disable reparse deferral, and allow quadratic parse runtime ++ on large tokens (default: reparse deferral enabled). ++ ++ ++ ++ + + + +diff --git a/expat/lib/.gitignore b/expat/lib/.gitignore +index 9c9cf88..cd5b24f 100644 +--- a/expat/lib/.gitignore ++++ b/expat/lib/.gitignore +@@ -1,7 +1,6 @@ + Makefile + .libs + *.lo +-expat.h + Debug + Debug-w + Release +diff --git a/expat/lib/expat.h b/expat/lib/expat.h +index 1f608c0..afe12c5 100644 +--- a/expat/lib/expat.h ++++ b/expat/lib/expat.h +@@ -1071,6 +1071,10 @@ XMLPARSEAPI(const XML_Feature *) + XML_GetFeatureList(void); + + ++/* Added in Expat 2.6.0. */ ++XMLPARSEAPI(XML_Bool) ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); ++ + /* Expat follows the semantic versioning convention. + See http://semver.org. + */ +diff --git a/expat/lib/internal.h b/expat/lib/internal.h +index e33fdcb..78b5bc1 100644 +--- a/expat/lib/internal.h ++++ b/expat/lib/internal.h +@@ -109,6 +109,7 @@ + # endif + #endif + ++#include "expat.h" + + #ifdef __cplusplus + extern "C" { +@@ -119,6 +120,9 @@ void + _INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef); + + ++extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c ++extern unsigned int g_parseAttempts; // used for testing only ++ + #ifdef __cplusplus + } + #endif +diff --git a/expat/lib/libexpat.def b/expat/lib/libexpat.def +index d08f5b7..163870b 100644 +--- a/expat/lib/libexpat.def ++++ b/expat/lib/libexpat.def +@@ -75,4 +75,5 @@ EXPORTS + ; XML_GetAttributeInfo @66 + XML_SetHashSalt @67@ + ; added with version 2.2.5 +- _INTERNAL_trim_to_complete_utf8_characters @68@ +\ No newline at end of file ++ _INTERNAL_trim_to_complete_utf8_characters @68@ ++ XML_SetReparseDeferralEnabled @69 +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index 3f765f7..488f63f 100644 +--- a/expat/lib/xmlparse.c ++++ b/expat/lib/xmlparse.c +@@ -34,6 +34,7 @@ + # define _GNU_SOURCE 1 /* syscall prototype */ + #endif + ++#include + #include + #include /* memset(), memcpy() */ + #include +@@ -173,6 +174,8 @@ typedef char ICHAR; + #endif /* HAVE_BCOPY */ + #endif /* HAVE_MEMMOVE */ + ++#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) ++ + #include "internal.h" + #include "xmltok.h" + #include "xmlrole.h" +@@ -544,6 +547,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName); + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) + ++XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c ++unsigned int g_parseAttempts = 0; // used for testing only ++ + struct XML_ParserStruct { + /* The first member must be m_userData so that the XML_GetUserData + macro works. */ +@@ -559,6 +565,9 @@ struct XML_ParserStruct { + const char *m_bufferLim; + XML_Index m_parseEndByteIndex; + const char *m_parseEndPtr; ++ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ ++ XML_Bool m_reparseDeferralEnabled; ++ int m_lastBufferRequestSize; + XML_Char *m_dataBuf; + XML_Char *m_dataBufEnd; + XML_StartElementHandler m_startElementHandler; +@@ -892,6 +901,48 @@ get_hash_secret_salt(XML_Parser parser) { + return parser->m_hash_secret_salt; + } + ++static enum XML_Error ++callProcessor(XML_Parser parser, const char *start, const char *end, ++ const char **endPtr) { ++ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); ++ ++ if (parser->m_reparseDeferralEnabled ++ && ! parser->m_parsingStatus.finalBuffer) { ++ // Heuristic: don't try to parse a partial token again until the amount of ++ // available data has increased significantly. ++ const size_t had_before = parser->m_partialTokenBytesBefore; ++ // ...but *do* try anyway if we're close to causing a reallocation. ++ size_t available_buffer ++ = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); ++#ifdef XML_CONTEXT_BYTES ++ available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); ++#endif ++ available_buffer ++ += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); ++ // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok ++ const bool enough ++ = (have_now >= 2 * had_before) ++ || ((size_t)parser->m_lastBufferRequestSize > available_buffer); ++ ++ if (! enough) { ++ *endPtr = start; // callers may expect this to be set ++ return XML_ERROR_NONE; ++ } ++ } ++ g_parseAttempts += 1; ++ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); ++ if (ret == XML_ERROR_NONE) { ++ // if we consumed nothing, remember what we had on this parse attempt. ++ if (*endPtr == start) { ++ parser->m_partialTokenBytesBefore = have_now; ++ } else { ++ parser->m_partialTokenBytesBefore = 0; ++ } ++ } ++ return ret; ++} ++ ++ + static XML_Bool /* only valid for root parser */ + startParsing(XML_Parser parser) + { +@@ -1078,6 +1129,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) + parser->m_bufferEnd = parser->m_buffer; + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; ++ parser->m_partialTokenBytesBefore = 0; ++ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; ++ parser->m_lastBufferRequestSize = 0; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; +@@ -1239,6 +1293,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt; ++ XML_Bool oldReparseDeferralEnabled; + + /* Validate the oldParser parameter before we pull everything out of it */ + if (oldParser == NULL) +@@ -1283,6 +1338,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, + to worry which hash secrets each table has. + */ + oldhash_secret_salt = parser->m_hash_secret_salt; ++ oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; + + #ifdef XML_DTD + if (!context) +@@ -1336,6 +1392,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, + parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; + parser->m_ns_triplets = oldns_triplets; + parser->m_hash_secret_salt = oldhash_secret_salt; ++ parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; + parser->m_parentParser = oldParser; + #ifdef XML_DTD + parser->m_paramEntityParsing = oldParamEntityParsing; +@@ -1833,52 +1890,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) + parser->m_parsingStatus.parsing = XML_PARSING; + } + +- if (len == 0) { +- parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; +- if (!isFinal) +- return XML_STATUS_OK; +- parser->m_positionPtr = parser->m_bufferPtr; +- parser->m_parseEndPtr = parser->m_bufferEnd; +- +- /* If data are left over from last buffer, and we now know that these +- data are the final chunk of input, then we have to check them again +- to detect errors based on that fact. +- */ +- parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); +- +- if (parser->m_errorCode == XML_ERROR_NONE) { +- switch (parser->m_parsingStatus.parsing) { +- case XML_SUSPENDED: +- /* It is hard to be certain, but it seems that this case +- * cannot occur. This code is cleaning up a previous parse +- * with no new data (since len == 0). Changing the parsing +- * state requires getting to execute a handler function, and +- * there doesn't seem to be an opportunity for that while in +- * this circumstance. +- * +- * Given the uncertainty, we retain the code but exclude it +- * from coverage tests. +- * +- * LCOV_EXCL_START +- */ +- XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position); +- parser->m_positionPtr = parser->m_bufferPtr; +- return XML_STATUS_SUSPENDED; +- /* LCOV_EXCL_STOP */ +- case XML_INITIALIZED: +- case XML_PARSING: +- parser->m_parsingStatus.parsing = XML_FINISHED; +- /* fall through */ +- default: +- return XML_STATUS_OK; +- } +- } +- parser->m_eventEndPtr = parser->m_eventPtr; +- parser->m_processor = errorProcessor; +- return XML_STATUS_ERROR; +- } + #ifndef XML_CONTEXT_BYTES +- else if (parser->m_bufferPtr == parser->m_bufferEnd) { ++ if (parser->m_bufferPtr == parser->m_bufferEnd) { + const char *end; + int nLeftOver; + enum XML_Status result; +@@ -1893,7 +1906,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) + parser->m_positionPtr = s; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + +- parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); ++ parser->m_errorCode = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -1920,22 +1933,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position); + nLeftOver = s + len - end; + if (nLeftOver) { +- if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) { +- /* avoid _signed_ integer overflow */ +- char *temp = NULL; +- const int bytesToAllocate = (int)((unsigned)len * 2U); +- if (bytesToAllocate > 0) { +- temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); +- } +- if (temp == NULL) { +- parser->m_errorCode = XML_ERROR_NO_MEMORY; +- parser->m_eventPtr = parser->m_eventEndPtr = NULL; +- parser->m_processor = errorProcessor; +- return XML_STATUS_ERROR; +- } +- parser->m_buffer = temp; +- parser->m_bufferLim = parser->m_buffer + bytesToAllocate; ++ // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED ++ // (and XML_ERROR_FINISHED) from XML_GetBuffer. ++ const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; ++ parser->m_parsingStatus.parsing = XML_PARSING; ++ void *const temp = XML_GetBuffer(parser, nLeftOver); ++ parser->m_parsingStatus.parsing = originalStatus; ++ // GetBuffer may have overwritten this, but we want to remember what the ++ // app requested, not how many bytes were left over after parsing. ++ parser->m_lastBufferRequestSize = len; ++ if (temp == NULL) { ++ // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). ++ parser->m_eventPtr = parser->m_eventEndPtr = NULL; ++ parser->m_processor = errorProcessor; ++ return XML_STATUS_ERROR; + } ++ // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we ++ // don't have any data to preserve, and can copy straight into the start ++ // of the buffer rather than the GetBuffer return pointer (which may be ++ // pointing further into the allocated buffer). + memcpy(parser->m_buffer, end, nLeftOver); + } + parser->m_bufferPtr = parser->m_buffer; +@@ -1947,15 +1963,14 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) + return result; + } + #endif /* not defined XML_CONTEXT_BYTES */ +- else { +- void *buff = XML_GetBuffer(parser, len); +- if (buff == NULL) +- return XML_STATUS_ERROR; +- else { +- memcpy(buff, s, len); +- return XML_ParseBuffer(parser, len, isFinal); +- } ++ void *buff = XML_GetBuffer(parser, len); ++ if (buff == NULL) ++ return XML_STATUS_ERROR; ++ if (len > 0) { ++ assert(s != NULL); // make sure s==NULL && len!=0 was rejected above ++ memcpy(buff, s, len); + } ++ return XML_ParseBuffer(parser, len, isFinal); + } + + enum XML_Status XMLCALL +@@ -1989,7 +2004,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) + parser->m_parseEndByteIndex += len; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + +- parser->m_errorCode = parser->m_processor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); ++ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, ++ &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -2035,8 +2051,14 @@ XML_GetBuffer(XML_Parser parser, int len) + default: ; + } + +- if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { ++ // whether or not the request succeeds, `len` seems to be the app's preferred ++ // buffer fill size; remember it. ++ parser->m_lastBufferRequestSize = len; ++ if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) ++ || parser->m_buffer == NULL) { ++#ifdef XML_CONTEXT_BYTES + int keep; ++#endif /* XML_CONTEXT_BYTES > 0 */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int)((unsigned)len + + (unsigned)EXPAT_SAFE_PTR_DIFF( +@@ -2055,8 +2077,9 @@ XML_GetBuffer(XML_Parser parser, int len) + return NULL; + } + neededSize += keep; +- if (neededSize +- <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { ++ if (parser->m_buffer && parser->m_bufferPtr ++ && neededSize ++ <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { + #ifdef XML_CONTEXT_BYTES + if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { + int offset +@@ -2070,19 +2093,17 @@ XML_GetBuffer(XML_Parser parser, int len) + parser->m_bufferPtr -= offset; + } + #else +- if (parser->m_buffer && parser->m_bufferPtr) { +- memmove(parser->m_buffer, parser->m_bufferPtr, +- EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); +- parser->m_bufferEnd +- = parser->m_buffer +- + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); +- parser->m_bufferPtr = parser->m_buffer; +- } +-#endif /* not defined XML_CONTEXT_BYTES */ ++ memmove(parser->m_buffer, parser->m_bufferPtr, ++ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); ++ parser->m_bufferEnd ++ = parser->m_buffer ++ + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); ++ parser->m_bufferPtr = parser->m_buffer; ++#endif /* XML_CONTEXT_BYTES > 0 */ + } else { + char *newBuf; + int bufferSize +- = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); ++ = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); + if (bufferSize == 0) + bufferSize = INIT_BUFFER_SIZE; + do { +@@ -2099,7 +2120,7 @@ XML_GetBuffer(XML_Parser parser, int len) + return NULL; + } + parser->m_bufferLim = newBuf + bufferSize; +-#ifdef XML_CONTEXT_BYTES ++#if XML_CONTEXT_BYTES > 0 + if (parser->m_bufferPtr) { + memcpy(newBuf, &parser->m_bufferPtr[-keep], + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) +@@ -2182,7 +2203,7 @@ XML_ResumeParser(XML_Parser parser) + } + parser->m_parsingStatus.parsing = XML_PARSING; + +- parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); ++ parser->m_errorCode = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -2504,6 +2525,15 @@ XML_GetFeatureList(void) + return features; + } + ++XML_Bool XMLCALL ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { ++ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { ++ parser->m_reparseDeferralEnabled = enabled; ++ return XML_TRUE; ++ } ++ return XML_FALSE; ++} ++ + /* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more +@@ -4440,16 +4470,17 @@ entityValueInitProcessor(XML_Parser parser, + parser->m_processor = entityValueProcessor; + return entityValueProcessor(parser, next, end, nextPtr); + } +- /* If we are at the end of the buffer, this would cause XmlPrologTok to +- return XML_TOK_NONE on the next call, which would then cause the +- function to exit with *nextPtr set to s - that is what we want for other +- tokens, but not for the BOM - we would rather like to skip it; +- then, when this routine is entered the next time, XmlPrologTok will +- return XML_TOK_INVALID, since the BOM is still in the buffer +- */ +- else if (tok == XML_TOK_BOM && next == end && !parser->m_parsingStatus.finalBuffer) { ++ /* XmlPrologTok has now set the encoding based on the BOM it found, and we ++ must move s and nextPtr forward to consume the BOM. ++ ++ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we ++ would leave the BOM in the buffer and return. On the next call to this ++ function, our XmlPrologTok call would return XML_TOK_INVALID, since it ++ is not valid to have multiple BOMs. ++ */ ++ else if (tok == XML_TOK_BOM) { + *nextPtr = next; +- return XML_ERROR_NONE; ++ s = next; + } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with +diff --git a/expat/tests/minicheck.c b/expat/tests/minicheck.c +index be1e37e..6c694a0 100644 +--- a/expat/tests/minicheck.c ++++ b/expat/tests/minicheck.c +@@ -209,6 +209,21 @@ srunner_run_all(SRunner *runner, int verbosity) + } + } + ++void ++_fail(const char *file, int line, const char *msg) { ++ /* Always print the error message so it isn't lost. In this case, ++ we have a failure, so there's no reason to be quiet about what ++ it is. ++ */ ++ _check_current_filename = file; ++ _check_current_lineno = line; ++ if (msg != NULL) { ++ const int has_newline = (msg[strlen(msg) - 1] == '\n'); ++ fprintf(stderr, "ERROR: %s%s", msg, has_newline ? "" : "\n"); ++ } ++ longjmp(env, 1); ++} ++ + void + _fail_unless(int UNUSED_P(condition), const char *UNUSED_P(file), int UNUSED_P(line), const char *msg) + { +diff --git a/expat/tests/minicheck.h b/expat/tests/minicheck.h +index a2f57dd..894895a 100644 +--- a/expat/tests/minicheck.h ++++ b/expat/tests/minicheck.h +@@ -60,7 +60,13 @@ extern "C" { + { + #define END_TEST } } + +-#define fail(msg) _fail_unless(0, __FILE__, __LINE__, msg) ++# define fail(msg) _fail(__FILE__, __LINE__, msg) ++# define assert_true(cond) \ ++ do { \ ++ if (! (cond)) { \ ++ _fail(__FILE__, __LINE__, "check failed: " #cond); \ ++ } \ ++ } while (0) + + typedef void (*tcase_setup_function)(void); + typedef void (*tcase_teardown_function)(void); +@@ -101,6 +107,11 @@ void _check_set_test_info(char const *function, + * Prototypes for the actual implementation. + */ + ++# if defined(__GNUC__) ++__attribute__((noreturn)) ++# endif ++void ++_fail(const char *file, int line, const char *msg); + void _fail_unless(int condition, const char *file, int line, const char *msg); + Suite *suite_create(const char *name); + TCase *tcase_create(const char *name); +diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c +index f58f794..486073f 100644 +--- a/expat/tests/runtests.c ++++ b/expat/tests/runtests.c +@@ -46,6 +46,7 @@ + #include /* ptrdiff_t */ + #include + #include ++#include + + #if ! defined(__cplusplus) + # if defined(_MSC_VER) && (_MSC_VER <= 1700) +@@ -1112,7 +1113,7 @@ START_TEST(test_column_number_after_parse) + const char *text = ""; + XML_Size colno; + +- if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR) ++ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + colno = XML_GetCurrentColumnNumber(parser); + if (colno != 11) { +@@ -2769,7 +2770,7 @@ START_TEST(test_default_current) + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +- CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD")); ++ CharData_CheckXMLChars(&storage, XCS("DCDCDCDD")); + + /* Again, without the defaulting */ + XML_ParserReset(parser, NULL); +@@ -2780,7 +2781,7 @@ START_TEST(test_default_current) + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +- CharData_CheckXMLChars(&storage, XCS("DcccccD")); ++ CharData_CheckXMLChars(&storage, XCS("DcccD")); + + /* Now with an internal entity to complicate matters */ + XML_ParserReset(parser, NULL); +@@ -4216,6 +4217,19 @@ END_TEST + #endif // defined(XML_CONTEXT_BYTES) + + ++START_TEST(test_getbuffer_allocates_on_zero_len) { ++ for (int first_len = 1; first_len >= 0; first_len--) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_GetBuffer(parser, first_len) != NULL); ++ assert_true(XML_GetBuffer(parser, 0) != NULL); ++ if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) ++ xml_failure(parser); ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ + /* Test position information macros */ + START_TEST(test_byte_info_at_end) + { +@@ -6747,6 +6761,12 @@ START_TEST(test_utf8_in_start_tags) { + char doc[1024]; + size_t failCount = 0; + ++ // we need all the bytes to be parsed, but we don't want the errors that can ++ // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. ++ if (g_reparseDeferralEnabledDefault) { ++ return; ++ } ++ + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + size_t j = 0; + for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { +@@ -7352,6 +7372,609 @@ START_TEST(test_empty_element_abort) + } + END_TEST + ++/* Regression test for quadratic parsing on large tokens */ ++START_TEST(test_big_tokens_take_linear_time) { ++ const char *const too_slow_failure_message ++ = "Compared to the baseline runtime of the first test, this test has a " ++ "slowdown of more than . " ++ "Please keep increasing the value by 1 until it reliably passes the " ++ "test on your hardware and open a bug sharing that number with us. " ++ "Thanks in advance!"; ++ const struct { ++ const char *pre; ++ const char *post; ++ } text[] = { ++ {"", ""}, // assumed good, used as baseline ++ {""}, // CDATA, performed OK before patch ++ {""}, // big attribute, used to be O(N²) ++ {""}, // long comment, used to be O(N²) ++ {"<", "/>"}, // big elem name, used to be O(N²) ++ }; ++ const int num_cases = sizeof(text) / sizeof(text[0]); ++ // For the test we need a value that is: ++ // (1) big enough that the test passes reliably (avoiding flaky tests), and ++ // (2) small enough that the test actually catches regressions. ++ const int max_slowdown = 15; ++ char aaaaaa[4096]; ++ const int fillsize = (int)sizeof(aaaaaa); ++ const int fillcount = 100; ++ ++ memset(aaaaaa, 'a', fillsize); ++ ++ if (! g_reparseDeferralEnabledDefault) { ++ return; // heuristic is disabled; we would get O(n^2) and fail. ++ } ++#if defined(_WIN32) ++ if (CLOCKS_PER_SEC < 100000) { ++ // Skip this test if clock() doesn't have reasonably good resolution. ++ // This workaround is only applied to Windows targets, since XSI requires ++ // the value to be 1 000 000 (10x the condition here), and we want to be ++ // very sure that at least one platform in CI can catch regressions. ++ return; ++ } ++#endif ++ ++ clock_t baseline = 0; ++ for (int i = 0; i < num_cases; ++i) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ enum XML_Status status; ++ const clock_t start = clock(); ++ ++ // parse the start text ++ status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, ++ (int)strlen(text[i].pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ // parse lots of 'a', failing the test early if it takes too long ++ for (int f = 0; f < fillcount; ++f) { ++ status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ // i == 0 means we're still calculating the baseline value ++ if (i > 0) { ++ const clock_t now = clock(); ++ const clock_t clocks_so_far = now - start; ++ const int slowdown = clocks_so_far / baseline; ++ if (slowdown >= max_slowdown) { ++ fprintf( ++ stderr, ++ "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n", ++ f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown); ++ fail(too_slow_failure_message); ++ } ++ } ++ } ++ // parse the end text ++ status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, ++ (int)strlen(text[i].post), XML_TRUE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ // how long did it take in total? ++ const clock_t end = clock(); ++ const clock_t taken = end - start; ++ if (i == 0) { ++ assert_true(taken > 0); // just to make sure we don't div-by-0 later ++ baseline = taken; ++ } ++ const int slowdown = taken / baseline; ++ if (slowdown >= max_slowdown) { ++ fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n", ++ (int)taken, (int)baseline, slowdown, max_slowdown); ++ fail(too_slow_failure_message); ++ } ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral) { ++ const char *const pre = ""; ++ const char *const start = ""; ++ char eeeeee[100]; ++ const int fillsize = (int)sizeof(eeeeee); ++ memset(eeeeee, 'e', fillsize); ++ ++ for (int enabled = 0; enabled <= 1; enabled += 1) { ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); ++ // pre-grow the buffer to avoid reparsing due to almost-fullness ++ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ enum XML_Status status; ++ // parse the start text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done ++ ++ // ..and the start of the token ++ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one ++ ++ // try to parse lots of 'e', but the token isn't finished ++ for (int c = 0; c < 100; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one ++ ++ // end the token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ if (enabled) { ++ // In general, we may need to push more data to trigger a reparse attempt, ++ // but in this test, the data is constructed to always require it. ++ CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect ++ // 2x the token length should suffice; the +1 covers the start and end. ++ for (int c = 0; c < 101; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ } ++ CharData_CheckXMLChars(&storage, XCS("dx")); // the should be done ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++struct element_decl_data { ++ XML_Parser parser; ++ int count; ++}; ++ ++static void ++element_decl_counter(void *userData, const XML_Char *UNUSED_P(name), XML_Content *model) { ++ struct element_decl_data *testdata = (struct element_decl_data *)userData; ++ testdata->count += 1; ++ XML_FreeContentModel(testdata->parser, model); ++} ++ ++static int ++external_inherited_parser(XML_Parser p, const XML_Char *context, ++ const XML_Char *UNUSED_P(base), const XML_Char *UNUSED_P(systemId), ++ const XML_Char *UNUSED_P(publicId)) { ++ const char *const pre = "\n"; ++ const char *const start = "\n"; ++ const char *const post = "\n"; ++ const int enabled = *(int *)XML_GetUserData(p); ++ char eeeeee[100]; ++ char spaces[100]; ++ const int fillsize = (int)sizeof(eeeeee); ++ assert_true(fillsize == (int)sizeof(spaces)); ++ memset(eeeeee, 'e', fillsize); ++ memset(spaces, ' ', fillsize); ++ ++ XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); ++ assert_true(parser != NULL); ++ // pre-grow the buffer to avoid reparsing due to almost-fullness ++ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); ++ ++ struct element_decl_data testdata; ++ testdata.parser = parser; ++ testdata.count = 0; ++ XML_SetUserData(parser, &testdata); ++ XML_SetElementDeclHandler(parser, element_decl_counter); ++ ++ enum XML_Status status; ++ // parse the initial text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 1); // first element should be done ++ ++ // ..and the start of the big token ++ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 1); // still just the first one ++ ++ // try to parse lots of 'e', but the token isn't finished ++ for (int c = 0; c < 100; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ assert_true(testdata.count == 1); // *still* just the first one ++ ++ // end the big token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ if (enabled) { ++ // In general, we may need to push more data to trigger a reparse attempt, ++ // but in this test, the data is constructed to always require it. ++ assert_true(testdata.count == 1); // or the test is incorrect ++ // 2x the token length should suffice; the +1 covers the start and end. ++ for (int c = 0; c < 101; ++c) { ++ status = XML_Parse(parser, spaces, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ } ++ assert_true(testdata.count == 2); // the big token should be done ++ ++ // parse the final text ++ status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done ++ ++ XML_ParserFree(parser); ++ return XML_STATUS_OK; ++} ++ ++START_TEST(test_reparse_deferral_is_inherited) { ++ const char *const text ++ = ""; ++ for (int enabled = 0; enabled <= 1; ++enabled) { ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ XML_SetUserData(parser, (void *)&enabled); ++ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); ++ // this handler creates a sub-parser and checks that its deferral behavior ++ // is what we expected, based on the value of `enabled` (in userdata). ++ XML_SetExternalEntityRefHandler(parser, external_inherited_parser); ++ assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); ++ if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) ++ xml_failure(parser); ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral_on_null_parser) { ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) ++ == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) ++ == XML_FALSE); ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral_on_the_fly) { ++ const char *const pre = ""; ++ char iiiiii[100]; ++ const int fillsize = (int)sizeof(iiiiii); ++ memset(iiiiii, 'i', fillsize); ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ enum XML_Status status; ++ // parse the start text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done ++ ++ // try to parse some 'i', but the token isn't finished ++ status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one ++ ++ // end the token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // not yet. ++ ++ // now change the heuristic setting and add *no* data ++ assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); ++ // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. ++ status = XML_Parse(parser, "", 0, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("dx")); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_set_bad_reparse_option) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); ++ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); ++ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++static size_t g_totalAlloc = 0; ++static size_t g_biggestAlloc = 0; ++ ++static void * ++counting_realloc(void *ptr, size_t size) { ++ g_totalAlloc += size; ++ if (size > g_biggestAlloc) { ++ g_biggestAlloc = size; ++ } ++ return realloc(ptr, size); ++} ++ ++static void * ++counting_malloc(size_t size) { ++ return counting_realloc(NULL, size); ++} ++ ++START_TEST(test_bypass_heuristic_when_close_to_bufsize) { ++ if (! g_reparseDeferralEnabledDefault) { ++ return; // this test is irrelevant when the deferral heuristic is disabled. ++ } ++ ++ const int document_length = 65536; ++ char *const document = (char *)malloc(document_length); ++ ++ const XML_Memory_Handling_Suite memfuncs = { ++ counting_malloc, ++ counting_realloc, ++ free, ++ }; ++ ++ const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; ++ const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; ++ const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; ++ ++ for (const int *leading = leading_list; *leading >= 0; leading++) { ++ for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { ++ for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { ++ // start by checking that the test looks reasonably valid ++ assert_true(*leading + *bigtoken <= document_length); ++ ++ // put 'x' everywhere; some will be overwritten by elements. ++ memset(document, 'x', document_length); ++ // maybe add an initial tag ++ if (*leading) { ++ assert_true(*leading >= 3); // or the test case is invalid ++ memcpy(document, "", 3); ++ } ++ // add the large token ++ document[*leading + 0] = '<'; ++ document[*leading + 1] = 'b'; ++ memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token ++ document[*leading + *bigtoken - 1] = '>'; ++ ++ // 1 for 'b', plus 1 or 0 depending on the presence of 'a' ++ const int expected_elem_total = 1 + (*leading ? 1 : 0); ++ ++ XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); ++ assert_true(parser != NULL); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ g_biggestAlloc = 0; ++ g_totalAlloc = 0; ++ int offset = 0; ++ // fill data until the big token is covered (but not necessarily parsed) ++ while (offset < *leading + *bigtoken) { ++ assert_true(offset + *fillsize <= document_length); ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ } ++ // Now, check that we've had a buffer allocation that could fit the ++ // context bytes and our big token. In order to detect a special case, ++ // we need to know how many bytes of our big token were included in the ++ // first push that contained _any_ bytes of the big token: ++ const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); ++ if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { ++ // Special case: we aren't saving any context, and the whole big token ++ // was covered by a single fill, so Expat may have parsed directly ++ // from our input pointer, without allocating an internal buffer. ++ } else if (*leading < XML_CONTEXT_BYTES) { ++ assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); ++ } else { ++ assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); ++ } ++ // fill data until the big token is actually parsed ++ while (storage.count < expected_elem_total) { ++ const size_t alloc_before = g_totalAlloc; ++ assert_true(offset + *fillsize <= document_length); ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ // since all the bytes of the big token are already in the buffer, ++ // the bufsize ceiling should make us finish its parsing without any ++ // further buffer allocations. We assume that there will be no other ++ // large allocations in this test. ++ assert_true(g_totalAlloc - alloc_before < 4096); ++ } ++ // test-the-test: was our alloc even called? ++ assert_true(g_totalAlloc > 0); ++ // test-the-test: there shouldn't be any extra start elements ++ assert_true(storage.count == expected_elem_total); ++ ++ XML_ParserFree(parser); ++ } ++ } ++ } ++ free(document); ++} ++END_TEST ++ ++START_TEST(test_varying_buffer_fills) { ++ const int KiB = 1024; ++ const int MiB = 1024 * KiB; ++ const int document_length = 16 * MiB; ++ const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB ++ ++ char *const document = (char *)malloc(document_length); ++ assert_true(document != NULL); ++ memset(document, 'x', document_length); ++ document[0] = '<'; ++ document[1] = 't'; ++ memset(&document[2], ' ', big - 2); // a very spacy token ++ document[big - 1] = '>'; ++ ++ // Each testcase is a list of buffer fill sizes, terminated by a value < 0. ++ // When reparse deferral is enabled, the final (negated) value is the expected ++ // maximum number of bytes scanned in parse attempts. ++ const int testcases[][30] = { ++ {8 * MiB, -8 * MiB}, ++ {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total ++ // zero-size fills shouldn't trigger the bypass ++ {4 * MiB, 0, 4 * MiB, -12 * MiB}, ++ {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, ++ {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, ++ // try to hit the buffer ceiling only once (at the end) ++ {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, ++ // try to hit the same buffer ceiling multiple times ++ {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, ++ ++ // try to hit every ceiling, by always landing 1K shy of the buffer size ++ {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, ++ 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, ++ ++ // try to avoid every ceiling, by always landing 1B past the buffer size ++ // the normal 2x heuristic threshold still forces parse attempts. ++ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 ++ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 ++ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 ++ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 ++ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 ++ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 ++ 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 ++ -(10 * MiB + 682 * KiB + 7)}, ++ // try to avoid every ceiling again, except on our last fill. ++ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 ++ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 ++ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 ++ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 ++ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 ++ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 ++ 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 ++ -(10 * MiB + 682 * KiB + 6)}, ++ ++ // try to hit ceilings on the way multiple times ++ {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer ++ 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer ++ 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer ++ 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer ++ // we'll make a parse attempt at every parse call ++ -(45 * MiB + 12)}, ++ }; ++ const int testcount = sizeof(testcases) / sizeof(testcases[0]); ++ for (int test_i = 0; test_i < testcount; test_i++) { ++ const int *fillsize = testcases[test_i]; ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ g_parseAttempts = 0; ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) ++ int scanned_bytes = 0; // sum of (buffered bytes at each actual parse) ++ int offset = 0; ++ while (*fillsize >= 0) { ++ assert_true(offset + *fillsize <= document_length); // or test is invalid ++ const unsigned attempts_before = g_parseAttempts; ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ fillsize++; ++ assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow ++ worstcase_bytes += offset; // we might've tried to parse all pending bytes ++ if (g_parseAttempts != attempts_before) { ++ assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse ++ assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow ++ scanned_bytes += offset; // we *did* try to parse all pending bytes ++ } ++ } ++ assert_true(storage.count == 1); // the big token should've been parsed ++ assert_true(scanned_bytes > 0); // test-the-test: does our counter work? ++ if (g_reparseDeferralEnabledDefault) { ++ // heuristic is enabled; some XML_Parse calls may have deferred reparsing ++ const int max_bytes_scanned = -*fillsize; ++ if (scanned_bytes > max_bytes_scanned) { ++ fprintf(stderr, ++ "bytes scanned in parse attempts: actual=%d limit=%d \n", ++ scanned_bytes, max_bytes_scanned); ++ fail("too many bytes scanned in parse attempts"); ++ } ++ assert_true(scanned_bytes <= worstcase_bytes); ++ } else { ++ // heuristic is disabled; every XML_Parse() will have reparsed ++ assert_true(scanned_bytes == worstcase_bytes); ++ } ++ ++ XML_ParserFree(parser); ++ } ++ free(document); ++} ++END_TEST ++ ++ + /* + * Namespaces tests. + */ +@@ -7435,13 +8058,13 @@ START_TEST(test_return_ns_triplet) + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); +- if (!triplet_start_flag) +- fail("triplet_start_checker not invoked"); + /* Check that unsetting "return triplets" fails while still parsing */ + XML_SetReturnNSTriplet(parser, XML_FALSE); + if (_XML_Parse_SINGLE_BYTES(parser, epilog, strlen(epilog), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); ++ if (!triplet_start_flag) ++ fail("triplet_start_checker not invoked"); + if (!triplet_end_flag) + fail("triplet_end_checker not invoked"); + if (dummy_handler_flags != (DUMMY_START_NS_DECL_HANDLER_FLAG | +@@ -12476,6 +13099,7 @@ make_suite(void) + #if defined(XML_CONTEXT_BYTES) + tcase_add_test(tc_basic, test_get_buffer_3_overflow); + #endif ++ tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); + tcase_add_test(tc_basic, test_byte_info_at_end); + tcase_add_test(tc_basic, test_byte_info_at_error); + tcase_add_test(tc_basic, test_byte_info_at_cdata); +@@ -12588,6 +13212,14 @@ make_suite(void) + tcase_add_test(tc_basic, test_bad_notation); + tcase_add_test(tc_basic, test_default_doctype_handler); + tcase_add_test(tc_basic, test_empty_element_abort); ++ tcase_add_test(tc_basic, test_big_tokens_take_linear_time); ++ tcase_add_test(tc_basic, test_set_reparse_deferral); ++ tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); ++ tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); ++ tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); ++ tcase_add_test(tc_basic, test_set_bad_reparse_option); ++ tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); ++ tcase_add_test(tc_basic, test_varying_buffer_fills); + + suite_add_tcase(s, tc_namespace); + tcase_add_checked_fixture(tc_namespace, +diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c +index 82d028e..cd26919 100644 +--- a/expat/xmlwf/xmlwf.c ++++ b/expat/xmlwf/xmlwf.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + #include "expat.h" + #include "codepage.h" +@@ -892,7 +893,7 @@ static void + usage(const XML_Char *prog, int rc) + { + ftprintf(stderr, +- T("usage: %s [-s] [-n] [-p] [-x] [-e encoding] [-w] [-d output-dir] [-c] [-m] [-r] [-t] [-N] [file ...]\n"), prog); ++ T("usage: %s [-s] [-n] [-p] [-x] [-e encoding] [-w] [-d output-dir] [-c] [-m] [-r] [-t] [-N] [-q] [file ...]\n"), prog); + exit(rc); + } + +@@ -917,6 +918,8 @@ tmain(int argc, XML_Char **argv) + XML_PARAM_ENTITY_PARSING_NEVER; + int useStdin = 0; + XmlwfUserData userData = { NULL, NULL, NULL }; ++ XML_Bool disableDeferral = XML_FALSE; ++ + + #ifdef _MSC_VER + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF); +@@ -1003,6 +1006,11 @@ tmain(int argc, XML_Char **argv) + case T('v'): + showVersion(argv[0]); + return 0; ++ case T('q'): { ++ disableDeferral = XML_TRUE; ++ j++; ++ break; ++ } + case T('\0'): + if (j > 1) { + i++; +@@ -1033,6 +1041,16 @@ tmain(int argc, XML_Char **argv) + exit(1); + } + ++ if (disableDeferral) { ++ const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE); ++ if (! success) { ++ // This prevents tperror(..) from reporting misleading "[..]: Success" ++ errno = EINVAL; ++ tperror(T("Failed to disable reparse deferral")); ++ exit(1); ++ } ++ } ++ + if (requireStandalone) + XML_SetNotStandaloneHandler(parser, notStandalone); + XML_SetParamEntityParsing(parser, paramEntityParsing); +diff --git a/testdata/largefiles/aaaaaa_attr.xml b/testdata/largefiles/aaaaaa_attr.xml +new file mode 100644 +index 0000000..66e3d25 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_attr.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_cdata.xml b/testdata/largefiles/aaaaaa_cdata.xml +new file mode 100644 +index 0000000..66f64bd +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_cdata.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_comment.xml b/testdata/largefiles/aaaaaa_comment.xml +new file mode 100644 +index 0000000..bb9af13 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_comment.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_tag.xml b/testdata/largefiles/aaaaaa_tag.xml +new file mode 100644 +index 0000000..946f701 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_tag.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_text.xml b/testdata/largefiles/aaaaaa_text.xml +new file mode 100644 +index 0000000..e266acb +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_text.xml +@@ -0,0 +1 @@ ++ACHARS +\ No newline at end of file diff --git a/expat.spec b/expat.spec index 5ceff69..293d4a2 100644 --- a/expat.spec +++ b/expat.spec @@ -3,7 +3,7 @@ Summary: An XML parser library Name: expat Version: %(echo %{unversion} | sed 's/_/./g') -Release: 11%{?dist} +Release: 12%{?dist} Source: https://github.com/libexpat/libexpat/archive/R_%{unversion}.tar.gz#/expat-%{version}.tar.gz URL: https://libexpat.github.io/ License: MIT @@ -22,6 +22,7 @@ Patch10: expat-2.2.5-Prevent-integer-overflow-in-copyString.patch Patch11: expat-2.2.5-Prevent-stack-exhaustion-in-build_model.patch Patch12: expat-2.2.5-Ensure-raw-tagnames-are-safe-exiting-internalEntityParser.patch Patch13: expat-2.2.5-CVE-2022-43680.patch +Patch14: expat-2.2.5-CVE-2023-52425.patch %description This is expat, the C library for parsing XML, written by James Clark. Expat @@ -63,6 +64,9 @@ Install it if you need to link statically with expat. %patch11 -p1 -b .CVE-2022-25313 %patch12 -p1 -b .CVE-2022-40674 %patch13 -p1 -b .CVE-2022-43680 +pushd .. +%patch14 -p1 -b .CVE-2023-52425 +popd sed -i 's/install-data-hook/do-nothing-please/' lib/Makefile.am ./buildconf.sh @@ -79,6 +83,15 @@ make install DESTDIR=$RPM_BUILD_ROOT rm -f $RPM_BUILD_ROOT%{_libdir}/*.la %check +bash -c "for i in {1..500000}; do printf AAAAAAAAAAAAAAAAAAAA >> achars.txt; done" +for testfile in ../testdata/largefiles/aaaaaa_*; do + first_part="$(sed 's/\(.*\)ACHARS.*/\1/g' $testfile)" + second_part="$(sed 's/.*ACHARS\(.*\)/\1/g' $testfile)" + printf "$first_part" > "$testfile" + cat achars.txt >> "$testfile" + printf "$second_part" >> "$testfile" +done + make check %ldconfig_scriptlets @@ -101,6 +114,10 @@ make check %{_libdir}/lib*.a %changelog +* Thu Mar 21 2024 Tomas Korbar - 2.2.5-12 +- CVE-2023-52425 expat: parsing large tokens can trigger a denial of service +- Resolves: RHEL-29321 + * Mon Nov 14 2022 Tomas Korbar - 2.2.5-11 - CVE-2022-43680 expat: use-after free caused by overeager destruction of a shared DTD in XML_ExternalEntityParserCreate - Resolves: CVE-2022-43680