diff --git a/.gitignore b/.gitignore
index d4d1ce1..f153597 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
SOURCES/expat-2.2.4.tar.bz2
/expat-2.2.4.tar.bz2
+/expat-2.5.0.tar.gz
diff --git a/expat-2.2.5-CVE-2018-20843.patch b/expat-2.2.5-CVE-2018-20843.patch
deleted file mode 100644
index 8afbfd0..0000000
--- a/expat-2.2.5-CVE-2018-20843.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-
-https://bugzilla.redhat.com/show_bug.cgi?id=CVE-2018-20843
-https://github.com/libexpat/libexpat/commit/11f8838bf99ea0a6f0b76f9760c43704d00c4ff6
-
---- libexpat-R_2_2_5/expat/lib/xmlparse.c.cve20843
-+++ libexpat-R_2_2_5/expat/lib/xmlparse.c
-@@ -6057,7 +6057,7 @@ setElementTypePrefix(XML_Parser parser,
- else
- poolDiscard(&dtd->pool);
- elementType->prefix = prefix;
--
-+ break;
- }
- }
- return 1;
diff --git a/expat-2.5.0-CVE-2023-52425.patch b/expat-2.5.0-CVE-2023-52425.patch
new file mode 100644
index 0000000..f561eaf
--- /dev/null
+++ b/expat-2.5.0-CVE-2023-52425.patch
@@ -0,0 +1,1466 @@
+commit 678a2f7efcaaa977886e055613f2332615aef82c
+Author: Tomas Korbar
The functions in this section configure the built-in +- protection against various forms of +- billion laughs attacks.
+++@@ -2188,6 +2185,27 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p, + + + +++XML_SetReparseDeferralEnabled
++++/* Added in Expat 2.6.0. */ ++XML_Bool XMLCALL ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); ++++++++ +++ Large tokens may require many parse calls before enough data is available for Expat to parse it in full. ++ If Expat retried parsing the token on every parse call, parsing could take quadratic time. ++ To avoid this, Expat only retries once a significant amount of new data is available. ++ This function allows disabling this behavior. ++
++++ The
++enabledargument should beXML_TRUEorXML_FALSE. ++++ Returns
++XML_TRUEon success, andXML_FALSEon error. ++Miscellaneous functions
+ +The functions in this section either obtain state information from +diff --git a/expat/doc/xmlwf.xml b/expat/doc/xmlwf.xml +index 9603abf..3d35393 100644 +--- a/expat/doc/xmlwf.xml ++++ b/expat/doc/xmlwf.xml +@@ -313,6 +313,16 @@ supports both. + + + ++
++ ++ +++ ++ ++++ Disable reparse deferral, and allow quadratic parse runtime ++ on large tokens (default: reparse deferral enabled). ++ +++ + +diff --git a/expat/lib/expat.h b/expat/lib/expat.h +index 1c83563..842dd70 100644 +--- a/expat/lib/expat.h ++++ b/expat/lib/expat.h +@@ -16,6 +16,7 @@ + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2017 Rhodri James + Copyright (c) 2022 Thijs Schreijer ++ Copyright (c) 2023 Sony Corporation / Snild Dolkow + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining +@@ -1050,6 +1051,10 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); + #endif + ++/* Added in Expat 2.6.0. */ ++XMLPARSEAPI(XML_Bool) ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); ++ + /* Expat follows the semantic versioning convention. + See http://semver.org. + */ +diff --git a/expat/lib/internal.h b/expat/lib/internal.h +index e09f533..e2709c8 100644 +--- a/expat/lib/internal.h ++++ b/expat/lib/internal.h +@@ -31,6 +31,7 @@ + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2018 Yury Gribov + Copyright (c) 2019 David Loffredo ++ Copyright (c) 2023 Sony Corporation / Snild Dolkow + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining +@@ -160,6 +161,9 @@ unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); + const char *unsignedCharToPrintable(unsigned char c); + #endif + ++extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c ++extern unsigned int g_parseAttempts; // used for testing only ++ + #ifdef __cplusplus + } + #endif +diff --git a/expat/lib/libexpat.def.cmake b/expat/lib/libexpat.def.cmake +index cf434a2..3ff4d55 100644 +--- a/expat/lib/libexpat.def.cmake ++++ b/expat/lib/libexpat.def.cmake +@@ -77,3 +77,4 @@ EXPORTS + ; added with version 2.4.0 + @_EXPAT_COMMENT_DTD@ XML_SetBillionLaughsAttackProtectionActivationThreshold @69 + @_EXPAT_COMMENT_DTD@ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 ++XML_SetReparseDeferralEnabled @71 +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index b6c2eca..2ae64e9 100644 +--- a/expat/lib/xmlparse.c ++++ b/expat/lib/xmlparse.c +@@ -73,6 +73,7 @@ + # endif + #endif + ++#include + #include + #include /* memset(), memcpy() */ + #include +@@ -196,6 +197,8 @@ typedef char ICHAR; + /* Do safe (NULL-aware) pointer arithmetic */ + #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) + ++#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) ++ + #include "internal.h" + #include "xmltok.h" + #include "xmlrole.h" +@@ -602,6 +605,9 @@ static unsigned long getDebugLevel(const char *variableName, + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) + ++XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c ++unsigned int g_parseAttempts = 0; // used for testing only ++ + struct XML_ParserStruct { + /* The first member must be m_userData so that the XML_GetUserData + macro works. */ +@@ -617,6 +623,9 @@ struct XML_ParserStruct { + const char *m_bufferLim; + XML_Index m_parseEndByteIndex; + const char *m_parseEndPtr; ++ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ ++ XML_Bool m_reparseDeferralEnabled; ++ int m_lastBufferRequestSize; + XML_Char *m_dataBuf; + XML_Char *m_dataBufEnd; + XML_StartElementHandler m_startElementHandler; +@@ -948,6 +957,47 @@ get_hash_secret_salt(XML_Parser parser) { + return parser->m_hash_secret_salt; + } + ++static enum XML_Error ++callProcessor(XML_Parser parser, const char *start, const char *end, ++ const char **endPtr) { ++ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); ++ ++ if (parser->m_reparseDeferralEnabled ++ && ! parser->m_parsingStatus.finalBuffer) { ++ // Heuristic: don't try to parse a partial token again until the amount of ++ // available data has increased significantly. ++ const size_t had_before = parser->m_partialTokenBytesBefore; ++ // ...but *do* try anyway if we're close to causing a reallocation. ++ size_t available_buffer ++ = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); ++#if XML_CONTEXT_BYTES > 0 ++ available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); ++#endif ++ available_buffer ++ += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); ++ // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok ++ const bool enough ++ = (have_now >= 2 * had_before) ++ || ((size_t)parser->m_lastBufferRequestSize > available_buffer); ++ ++ if (! enough) { ++ *endPtr = start; // callers may expect this to be set ++ return XML_ERROR_NONE; ++ } ++ } ++ g_parseAttempts += 1; ++ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); ++ if (ret == XML_ERROR_NONE) { ++ // if we consumed nothing, remember what we had on this parse attempt. ++ if (*endPtr == start) { ++ parser->m_partialTokenBytesBefore = have_now; ++ } else { ++ parser->m_partialTokenBytesBefore = 0; ++ } ++ } ++ return ret; ++} ++ + static XML_Bool /* only valid for root parser */ + startParsing(XML_Parser parser) { + /* hash functions must be initialized before setContext() is called */ +@@ -1129,6 +1179,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_bufferEnd = parser->m_buffer; + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; ++ parser->m_partialTokenBytesBefore = 0; ++ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; ++ parser->m_lastBufferRequestSize = 0; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; +@@ -1298,6 +1351,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt; ++ XML_Bool oldReparseDeferralEnabled; + + /* Validate the oldParser parameter before we pull everything out of it */ + if (oldParser == NULL) +@@ -1342,6 +1396,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + to worry which hash secrets each table has. + */ + oldhash_secret_salt = parser->m_hash_secret_salt; ++ oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; + + #ifdef XML_DTD + if (! context) +@@ -1394,6 +1449,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; + parser->m_ns_triplets = oldns_triplets; + parser->m_hash_secret_salt = oldhash_secret_salt; ++ parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; + parser->m_parentParser = oldParser; + #ifdef XML_DTD + parser->m_paramEntityParsing = oldParamEntityParsing; +@@ -1848,55 +1904,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + parser->m_parsingStatus.parsing = XML_PARSING; + } + +- if (len == 0) { +- parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; +- if (! isFinal) +- return XML_STATUS_OK; +- parser->m_positionPtr = parser->m_bufferPtr; +- parser->m_parseEndPtr = parser->m_bufferEnd; +- +- /* If data are left over from last buffer, and we now know that these +- data are the final chunk of input, then we have to check them again +- to detect errors based on that fact. +- */ +- parser->m_errorCode +- = parser->m_processor(parser, parser->m_bufferPtr, +- parser->m_parseEndPtr, &parser->m_bufferPtr); +- +- if (parser->m_errorCode == XML_ERROR_NONE) { +- switch (parser->m_parsingStatus.parsing) { +- case XML_SUSPENDED: +- /* It is hard to be certain, but it seems that this case +- * cannot occur. This code is cleaning up a previous parse +- * with no new data (since len == 0). Changing the parsing +- * state requires getting to execute a handler function, and +- * there doesn't seem to be an opportunity for that while in +- * this circumstance. +- * +- * Given the uncertainty, we retain the code but exclude it +- * from coverage tests. +- * +- * LCOV_EXCL_START +- */ +- XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, +- parser->m_bufferPtr, &parser->m_position); +- parser->m_positionPtr = parser->m_bufferPtr; +- return XML_STATUS_SUSPENDED; +- /* LCOV_EXCL_STOP */ +- case XML_INITIALIZED: +- case XML_PARSING: +- parser->m_parsingStatus.parsing = XML_FINISHED; +- /* fall through */ +- default: +- return XML_STATUS_OK; +- } +- } +- parser->m_eventEndPtr = parser->m_eventPtr; +- parser->m_processor = errorProcessor; +- return XML_STATUS_ERROR; +- } + #ifndef XML_CONTEXT_BYTES +- else if (parser->m_bufferPtr == parser->m_bufferEnd) { ++ if (parser->m_bufferPtr == parser->m_bufferEnd) { + const char *end; + int nLeftOver; + enum XML_Status result; +@@ -1907,12 +1916,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } ++ // though this isn't a buffer request, we assume that `len` is the app's ++ // preferred buffer fill size, and therefore save it here. ++ parser->m_lastBufferRequestSize = len; + parser->m_parseEndByteIndex += len; + parser->m_positionPtr = s; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + + parser->m_errorCode +- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); ++ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -1939,23 +1951,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + &parser->m_position); + nLeftOver = s + len - end; + if (nLeftOver) { +- if (parser->m_buffer == NULL +- || nLeftOver > parser->m_bufferLim - parser->m_buffer) { +- /* avoid _signed_ integer overflow */ +- char *temp = NULL; +- const int bytesToAllocate = (int)((unsigned)len * 2U); +- if (bytesToAllocate > 0) { +- temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); +- } +- if (temp == NULL) { +- parser->m_errorCode = XML_ERROR_NO_MEMORY; +- parser->m_eventPtr = parser->m_eventEndPtr = NULL; +- parser->m_processor = errorProcessor; +- return XML_STATUS_ERROR; +- } +- parser->m_buffer = temp; +- parser->m_bufferLim = parser->m_buffer + bytesToAllocate; ++ // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED ++ // (and XML_ERROR_FINISHED) from XML_GetBuffer. ++ const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; ++ parser->m_parsingStatus.parsing = XML_PARSING; ++ void *const temp = XML_GetBuffer(parser, nLeftOver); ++ parser->m_parsingStatus.parsing = originalStatus; ++ // GetBuffer may have overwritten this, but we want to remember what the ++ // app requested, not how many bytes were left over after parsing. ++ parser->m_lastBufferRequestSize = len; ++ if (temp == NULL) { ++ // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). ++ parser->m_eventPtr = parser->m_eventEndPtr = NULL; ++ parser->m_processor = errorProcessor; ++ return XML_STATUS_ERROR; + } ++ // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we ++ // don't have any data to preserve, and can copy straight into the start ++ // of the buffer rather than the GetBuffer return pointer (which may be ++ // pointing further into the allocated buffer). + memcpy(parser->m_buffer, end, nLeftOver); + } + parser->m_bufferPtr = parser->m_buffer; +@@ -1967,15 +1981,14 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + return result; + } + #endif /* not defined XML_CONTEXT_BYTES */ +- else { +- void *buff = XML_GetBuffer(parser, len); +- if (buff == NULL) +- return XML_STATUS_ERROR; +- else { +- memcpy(buff, s, len); +- return XML_ParseBuffer(parser, len, isFinal); +- } ++ void *buff = XML_GetBuffer(parser, len); ++ if (buff == NULL) ++ return XML_STATUS_ERROR; ++ if (len > 0) { ++ assert(s != NULL); // make sure s==NULL && len!=0 was rejected above ++ memcpy(buff, s, len); + } ++ return XML_ParseBuffer(parser, len, isFinal); + } + + enum XML_Status XMLCALL +@@ -2015,8 +2028,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { + parser->m_parseEndByteIndex += len; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + +- parser->m_errorCode = parser->m_processor( +- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); ++ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, ++ &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -2061,10 +2074,14 @@ XML_GetBuffer(XML_Parser parser, int len) { + default:; + } + +- if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { +-#ifdef XML_CONTEXT_BYTES ++ // whether or not the request succeeds, `len` seems to be the app's preferred ++ // buffer fill size; remember it. ++ parser->m_lastBufferRequestSize = len; ++ if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) ++ || parser->m_buffer == NULL) { ++#if XML_CONTEXT_BYTES > 0 + int keep; +-#endif /* defined XML_CONTEXT_BYTES */ ++#endif /* XML_CONTEXT_BYTES > 0 */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int)((unsigned)len + + (unsigned)EXPAT_SAFE_PTR_DIFF( +@@ -2073,7 +2090,7 @@ XML_GetBuffer(XML_Parser parser, int len) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +-#ifdef XML_CONTEXT_BYTES ++#if XML_CONTEXT_BYTES > 0 + keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; +@@ -2083,10 +2100,11 @@ XML_GetBuffer(XML_Parser parser, int len) { + return NULL; + } + neededSize += keep; +-#endif /* defined XML_CONTEXT_BYTES */ +- if (neededSize +- <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { +-#ifdef XML_CONTEXT_BYTES ++#endif /* XML_CONTEXT_BYTES > 0 */ ++ if (parser->m_buffer && parser->m_bufferPtr ++ && neededSize ++ <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { ++#if XML_CONTEXT_BYTES > 0 + if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { + int offset + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) +@@ -2099,19 +2117,17 @@ XML_GetBuffer(XML_Parser parser, int len) { + parser->m_bufferPtr -= offset; + } + #else +- if (parser->m_buffer && parser->m_bufferPtr) { +- memmove(parser->m_buffer, parser->m_bufferPtr, +- EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); +- parser->m_bufferEnd +- = parser->m_buffer +- + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); +- parser->m_bufferPtr = parser->m_buffer; +- } +-#endif /* not defined XML_CONTEXT_BYTES */ ++ memmove(parser->m_buffer, parser->m_bufferPtr, ++ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); ++ parser->m_bufferEnd ++ = parser->m_buffer ++ + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); ++ parser->m_bufferPtr = parser->m_buffer; ++#endif /* XML_CONTEXT_BYTES > 0 */ + } else { + char *newBuf; + int bufferSize +- = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); ++ = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); + if (bufferSize == 0) + bufferSize = INIT_BUFFER_SIZE; + do { +@@ -2128,7 +2144,7 @@ XML_GetBuffer(XML_Parser parser, int len) { + return NULL; + } + parser->m_bufferLim = newBuf + bufferSize; +-#ifdef XML_CONTEXT_BYTES ++#if XML_CONTEXT_BYTES > 0 + if (parser->m_bufferPtr) { + memcpy(newBuf, &parser->m_bufferPtr[-keep], + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) +@@ -2158,7 +2174,7 @@ XML_GetBuffer(XML_Parser parser, int len) { + parser->m_bufferEnd = newBuf; + } + parser->m_bufferPtr = parser->m_buffer = newBuf; +-#endif /* not defined XML_CONTEXT_BYTES */ ++#endif /* XML_CONTEXT_BYTES > 0 */ + } + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; +@@ -2208,7 +2224,7 @@ XML_ResumeParser(XML_Parser parser) { + } + parser->m_parsingStatus.parsing = XML_PARSING; + +- parser->m_errorCode = parser->m_processor( ++ parser->m_errorCode = callProcessor( + parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { +@@ -2561,6 +2577,15 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( + } + #endif /* XML_DTD */ + ++XML_Bool XMLCALL ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { ++ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { ++ parser->m_reparseDeferralEnabled = enabled; ++ return XML_TRUE; ++ } ++ return XML_FALSE; ++} ++ + /* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more +@@ -4482,15 +4507,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + parser->m_processor = entityValueProcessor; + return entityValueProcessor(parser, next, end, nextPtr); + } +- /* If we are at the end of the buffer, this would cause XmlPrologTok to +- return XML_TOK_NONE on the next call, which would then cause the +- function to exit with *nextPtr set to s - that is what we want for other +- tokens, but not for the BOM - we would rather like to skip it; +- then, when this routine is entered the next time, XmlPrologTok will +- return XML_TOK_INVALID, since the BOM is still in the buffer ++ /* XmlPrologTok has now set the encoding based on the BOM it found, and we ++ must move s and nextPtr forward to consume the BOM. ++ ++ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we ++ would leave the BOM in the buffer and return. On the next call to this ++ function, our XmlPrologTok call would return XML_TOK_INVALID, since it ++ is not valid to have multiple BOMs. + */ +- else if (tok == XML_TOK_BOM && next == end +- && ! parser->m_parsingStatus.finalBuffer) { ++ else if (tok == XML_TOK_BOM) { + # ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { +@@ -4500,7 +4525,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + # endif + + *nextPtr = next; +- return XML_ERROR_NONE; ++ s = next; + } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with +diff --git a/expat/tests/minicheck.c b/expat/tests/minicheck.c +index 1c65748..f383380 100644 +--- a/expat/tests/minicheck.c ++++ b/expat/tests/minicheck.c +@@ -208,6 +208,21 @@ srunner_run_all(SRunner *runner, int verbosity) { + } + } + ++void ++_fail(const char *file, int line, const char *msg) { ++ /* Always print the error message so it isn't lost. In this case, ++ we have a failure, so there's no reason to be quiet about what ++ it is. ++ */ ++ _check_current_filename = file; ++ _check_current_lineno = line; ++ if (msg != NULL) { ++ const int has_newline = (msg[strlen(msg) - 1] == '\n'); ++ fprintf(stderr, "ERROR: %s%s", msg, has_newline ? "" : "\n"); ++ } ++ longjmp(env, 1); ++} ++ + void + _fail_unless(int condition, const char *file, int line, const char *msg) { + /* Always print the error message so it isn't lost. In this case, +diff --git a/expat/tests/minicheck.h b/expat/tests/minicheck.h +index cc1f835..032b54e 100644 +--- a/expat/tests/minicheck.h ++++ b/expat/tests/minicheck.h +@@ -64,7 +64,14 @@ extern "C" { + } \ + } + +-#define fail(msg) _fail_unless(0, __FILE__, __LINE__, msg) ++ ++# define fail(msg) _fail(__FILE__, __LINE__, msg) ++# define assert_true(cond) \ ++ do { \ ++ if (! (cond)) { \ ++ _fail(__FILE__, __LINE__, "check failed: " #cond); \ ++ } \ ++ } while (0) + + typedef void (*tcase_setup_function)(void); + typedef void (*tcase_teardown_function)(void); +@@ -103,6 +110,11 @@ void _check_set_test_info(char const *function, char const *filename, + * Prototypes for the actual implementation. + */ + ++# if defined(__GNUC__) ++__attribute__((noreturn)) ++# endif ++void ++_fail(const char *file, int line, const char *msg); + void _fail_unless(int condition, const char *file, int line, const char *msg); + Suite *suite_create(const char *name); + TCase *tcase_create(const char *name); +diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c +index 915fa52..941f61d 100644 +--- a/expat/tests/runtests.c ++++ b/expat/tests/runtests.c +@@ -54,6 +54,7 @@ + #include + #include + #include /* intptr_t uint64_t */ ++#include + + #if ! defined(__cplusplus) + # include +@@ -1071,7 +1072,7 @@ START_TEST(test_column_number_after_parse) { + const char *text = " "; + XML_Size colno; + +- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); + colno = XML_GetCurrentColumnNumber(g_parser); +@@ -2582,7 +2583,7 @@ START_TEST(test_default_current) { + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD")); ++ CharData_CheckXMLChars(&storage, XCS("DCDCDCDD")); + + /* Again, without the defaulting */ + XML_ParserReset(g_parser, NULL); +@@ -2593,7 +2594,7 @@ START_TEST(test_default_current) { + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DcccccD")); ++ CharData_CheckXMLChars(&storage, XCS("DcccD")); + + /* Now with an internal entity to complicate matters */ + XML_ParserReset(g_parser, NULL); +@@ -3946,6 +3947,19 @@ START_TEST(test_get_buffer_3_overflow) { + END_TEST + #endif // defined(XML_CONTEXT_BYTES) + ++START_TEST(test_getbuffer_allocates_on_zero_len) { ++ for (int first_len = 1; first_len >= 0; first_len--) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_GetBuffer(parser, first_len) != NULL); ++ assert_true(XML_GetBuffer(parser, 0) != NULL); ++ if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) ++ xml_failure(parser); ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ + /* Test position information macros */ + START_TEST(test_byte_info_at_end) { + const char *text = " "; +@@ -6205,6 +6219,12 @@ START_TEST(test_utf8_in_start_tags) { + char doc[1024]; + size_t failCount = 0; + ++ // we need all the bytes to be parsed, but we don't want the errors that can ++ // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. ++ if (g_reparseDeferralEnabledDefault) { ++ return; ++ } ++ + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + size_t j = 0; + for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { +@@ -6830,6 +6850,613 @@ START_TEST(test_nested_entity_suspend) { + } + END_TEST + ++/* Regression test for quadratic parsing on large tokens */ ++START_TEST(test_big_tokens_take_linear_time) { ++ const char *const too_slow_failure_message ++ = "Compared to the baseline runtime of the first test, this test has a " ++ "slowdown of more than . " ++ "Please keep increasing the value by 1 until it reliably passes the " ++ "test on your hardware and open a bug sharing that number with us. " ++ "Thanks in advance!"; ++ const struct { ++ const char *pre; ++ const char *post; ++ } text[] = { ++ {"", ""}, // assumed good, used as baseline ++ {""}, // CDATA, performed OK before patch ++ {" "}, // big attribute, used to be O(N²) ++ {" "}, // long comment, used to be O(N²) ++ {" <", "/> "}, // big elem name, used to be O(N²) ++ }; ++ const int num_cases = sizeof(text) / sizeof(text[0]); ++ // For the test we need avalue that is: ++ // (1) big enough that the test passes reliably (avoiding flaky tests), and ++ // (2) small enough that the test actually catches regressions. ++ const int max_slowdown = 15; ++ char aaaaaa[4096]; ++ const int fillsize = (int)sizeof(aaaaaa); ++ const int fillcount = 100; ++ ++ memset(aaaaaa, 'a', fillsize); ++ ++ if (! g_reparseDeferralEnabledDefault) { ++ return; // heuristic is disabled; we would get O(n^2) and fail. ++ } ++#if defined(_WIN32) ++ if (CLOCKS_PER_SEC < 100000) { ++ // Skip this test if clock() doesn't have reasonably good resolution. ++ // This workaround is only applied to Windows targets, since XSI requires ++ // the value to be 1 000 000 (10x the condition here), and we want to be ++ // very sure that at least one platform in CI can catch regressions. ++ return; ++ } ++#endif ++ ++ clock_t baseline = 0; ++ for (int i = 0; i < num_cases; ++i) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ enum XML_Status status; ++ const clock_t start = clock(); ++ ++ // parse the start text ++ status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, ++ (int)strlen(text[i].pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ // parse lots of 'a', failing the test early if it takes too long ++ for (int f = 0; f < fillcount; ++f) { ++ status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ // i == 0 means we're still calculating the baseline value ++ if (i > 0) { ++ const clock_t now = clock(); ++ const clock_t clocks_so_far = now - start; ++ const int slowdown = clocks_so_far / baseline; ++ if (slowdown >= max_slowdown) { ++ fprintf( ++ stderr, ++ "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n", ++ f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown); ++ fail(too_slow_failure_message); ++ } ++ } ++ } ++ // parse the end text ++ status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, ++ (int)strlen(text[i].post), XML_TRUE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ // how long did it take in total? ++ const clock_t end = clock(); ++ const clock_t taken = end - start; ++ if (i == 0) { ++ assert_true(taken > 0); // just to make sure we don't div-by-0 later ++ baseline = taken; ++ } ++ const int slowdown = taken / baseline; ++ if (slowdown >= max_slowdown) { ++ fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n", ++ (int)taken, (int)baseline, slowdown, max_slowdown); ++ fail(too_slow_failure_message); ++ } ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral) { ++ const char *const pre = " "; ++ const char *const start = " "; ++ char eeeeee[100]; ++ const int fillsize = (int)sizeof(eeeeee); ++ memset(eeeeee, 'e', fillsize); ++ ++ for (int enabled = 0; enabled <= 1; enabled += 1) { ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); ++ // pre-grow the buffer to avoid reparsing due to almost-fullness ++ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ enum XML_Status status; ++ // parse the start text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done ++ ++ // ..and the start of the token ++ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one ++ ++ // try to parse lots of 'e', but the token isn't finished ++ for (int c = 0; c < 100; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one ++ ++ // end the token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ if (enabled) { ++ // In general, we may need to push more data to trigger a reparse attempt, ++ // but in this test, the data is constructed to always require it. ++ CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect ++ // 2x the token length should suffice; the +1 covers the start and end. ++ for (int c = 0; c < 101; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ } ++ CharData_CheckXMLChars(&storage, XCS("dx")); // the should be done ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++struct element_decl_data { ++ XML_Parser parser; ++ int count; ++}; ++ ++static void ++element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { ++ UNUSED_P(name); ++ struct element_decl_data *testdata = (struct element_decl_data *)userData; ++ testdata->count += 1; ++ XML_FreeContentModel(testdata->parser, model); ++} ++ ++static int ++external_inherited_parser(XML_Parser p, const XML_Char *context, ++ const XML_Char *base, const XML_Char *systemId, ++ const XML_Char *publicId) { ++ UNUSED_P(base); ++ UNUSED_P(systemId); ++ UNUSED_P(publicId); ++ const char *const pre = "\n"; ++ const char *const start = "\n"; ++ const char *const post = "\n"; ++ const int enabled = *(int *)XML_GetUserData(p); ++ char eeeeee[100]; ++ char spaces[100]; ++ const int fillsize = (int)sizeof(eeeeee); ++ assert_true(fillsize == (int)sizeof(spaces)); ++ memset(eeeeee, 'e', fillsize); ++ memset(spaces, ' ', fillsize); ++ ++ XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); ++ assert_true(parser != NULL); ++ // pre-grow the buffer to avoid reparsing due to almost-fullness ++ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); ++ ++ struct element_decl_data testdata; ++ testdata.parser = parser; ++ testdata.count = 0; ++ XML_SetUserData(parser, &testdata); ++ XML_SetElementDeclHandler(parser, element_decl_counter); ++ ++ enum XML_Status status; ++ // parse the initial text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 1); // first element should be done ++ ++ // ..and the start of the big token ++ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 1); // still just the first one ++ ++ // try to parse lots of 'e', but the token isn't finished ++ for (int c = 0; c < 100; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ assert_true(testdata.count == 1); // *still* just the first one ++ ++ // end the big token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ if (enabled) { ++ // In general, we may need to push more data to trigger a reparse attempt, ++ // but in this test, the data is constructed to always require it. ++ assert_true(testdata.count == 1); // or the test is incorrect ++ // 2x the token length should suffice; the +1 covers the start and end. ++ for (int c = 0; c < 101; ++c) { ++ status = XML_Parse(parser, spaces, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ } ++ assert_true(testdata.count == 2); // the big token should be done ++ ++ // parse the final text ++ status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done ++ ++ XML_ParserFree(parser); ++ return XML_STATUS_OK; ++} ++ ++START_TEST(test_reparse_deferral_is_inherited) { ++ const char *const text ++ = " "; ++ for (int enabled = 0; enabled <= 1; ++enabled) { ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ XML_SetUserData(parser, (void *)&enabled); ++ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); ++ // this handler creates a sub-parser and checks that its deferral behavior ++ // is what we expected, based on the value of `enabled` (in userdata). ++ XML_SetExternalEntityRefHandler(parser, external_inherited_parser); ++ assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); ++ if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) ++ xml_failure(parser); ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral_on_null_parser) { ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) ++ == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) ++ == XML_FALSE); ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral_on_the_fly) { ++ const char *const pre = " "; ++ char iiiiii[100]; ++ const int fillsize = (int)sizeof(iiiiii); ++ memset(iiiiii, 'i', fillsize); ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ enum XML_Status status; ++ // parse the start text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done ++ ++ // try to parse some 'i', but the token isn't finished ++ status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one ++ ++ // end the token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // not yet. ++ ++ // now change the heuristic setting and add *no* data ++ assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); ++ // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. ++ status = XML_Parse(parser, "", 0, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("dx")); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_set_bad_reparse_option) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); ++ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); ++ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++static size_t g_totalAlloc = 0; ++static size_t g_biggestAlloc = 0; ++ ++static void * ++counting_realloc(void *ptr, size_t size) { ++ g_totalAlloc += size; ++ if (size > g_biggestAlloc) { ++ g_biggestAlloc = size; ++ } ++ return realloc(ptr, size); ++} ++ ++static void * ++counting_malloc(size_t size) { ++ return counting_realloc(NULL, size); ++} ++ ++START_TEST(test_bypass_heuristic_when_close_to_bufsize) { ++ if (! g_reparseDeferralEnabledDefault) { ++ return; // this test is irrelevant when the deferral heuristic is disabled. ++ } ++ ++ const int document_length = 65536; ++ char *const document = (char *)malloc(document_length); ++ ++ const XML_Memory_Handling_Suite memfuncs = { ++ counting_malloc, ++ counting_realloc, ++ free, ++ }; ++ ++ const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; ++ const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; ++ const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; ++ ++ for (const int *leading = leading_list; *leading >= 0; leading++) { ++ for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { ++ for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { ++ // start by checking that the test looks reasonably valid ++ assert_true(*leading + *bigtoken <= document_length); ++ ++ // put 'x' everywhere; some will be overwritten by elements. ++ memset(document, 'x', document_length); ++ // maybe add an initial tag ++ if (*leading) { ++ assert_true(*leading >= 3); // or the test case is invalid ++ memcpy(document, "", 3); ++ } ++ // add the large token ++ document[*leading + 0] = '<'; ++ document[*leading + 1] = 'b'; ++ memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token ++ document[*leading + *bigtoken - 1] = '>'; ++ ++ // 1 for 'b', plus 1 or 0 depending on the presence of 'a' ++ const int expected_elem_total = 1 + (*leading ? 1 : 0); ++ ++ XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); ++ assert_true(parser != NULL); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ g_biggestAlloc = 0; ++ g_totalAlloc = 0; ++ int offset = 0; ++ // fill data until the big token is covered (but not necessarily parsed) ++ while (offset < *leading + *bigtoken) { ++ assert_true(offset + *fillsize <= document_length); ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ } ++ // Now, check that we've had a buffer allocation that could fit the ++ // context bytes and our big token. In order to detect a special case, ++ // we need to know how many bytes of our big token were included in the ++ // first push that contained _any_ bytes of the big token: ++ const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); ++ if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { ++ // Special case: we aren't saving any context, and the whole big token ++ // was covered by a single fill, so Expat may have parsed directly ++ // from our input pointer, without allocating an internal buffer. ++ } else if (*leading < XML_CONTEXT_BYTES) { ++ assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); ++ } else { ++ assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); ++ } ++ // fill data until the big token is actually parsed ++ while (storage.count < expected_elem_total) { ++ const size_t alloc_before = g_totalAlloc; ++ assert_true(offset + *fillsize <= document_length); ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ // since all the bytes of the big token are already in the buffer, ++ // the bufsize ceiling should make us finish its parsing without any ++ // further buffer allocations. We assume that there will be no other ++ // large allocations in this test. ++ assert_true(g_totalAlloc - alloc_before < 4096); ++ } ++ // test-the-test: was our alloc even called? ++ assert_true(g_totalAlloc > 0); ++ // test-the-test: there shouldn't be any extra start elements ++ assert_true(storage.count == expected_elem_total); ++ ++ XML_ParserFree(parser); ++ } ++ } ++ } ++ free(document); ++} ++END_TEST ++ ++START_TEST(test_varying_buffer_fills) { ++ const int KiB = 1024; ++ const int MiB = 1024 * KiB; ++ const int document_length = 16 * MiB; ++ const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB ++ ++ char *const document = (char *)malloc(document_length); ++ assert_true(document != NULL); ++ memset(document, 'x', document_length); ++ document[0] = '<'; ++ document[1] = 't'; ++ memset(&document[2], ' ', big - 2); // a very spacy token ++ document[big - 1] = '>'; ++ ++ // Each testcase is a list of buffer fill sizes, terminated by a value < 0. ++ // When reparse deferral is enabled, the final (negated) value is the expected ++ // maximum number of bytes scanned in parse attempts. ++ const int testcases[][30] = { ++ {8 * MiB, -8 * MiB}, ++ {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total ++ // zero-size fills shouldn't trigger the bypass ++ {4 * MiB, 0, 4 * MiB, -12 * MiB}, ++ {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, ++ {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, ++ // try to hit the buffer ceiling only once (at the end) ++ {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, ++ // try to hit the same buffer ceiling multiple times ++ {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, ++ ++ // try to hit every ceiling, by always landing 1K shy of the buffer size ++ {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, ++ 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, ++ ++ // try to avoid every ceiling, by always landing 1B past the buffer size ++ // the normal 2x heuristic threshold still forces parse attempts. ++ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 ++ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 ++ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 ++ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 ++ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 ++ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 ++ 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 ++ -(10 * MiB + 682 * KiB + 7)}, ++ // try to avoid every ceiling again, except on our last fill. ++ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 ++ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 ++ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 ++ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 ++ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 ++ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 ++ 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 ++ -(10 * MiB + 682 * KiB + 6)}, ++ ++ // try to hit ceilings on the way multiple times ++ {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer ++ 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer ++ 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer ++ 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer ++ // we'll make a parse attempt at every parse call ++ -(45 * MiB + 12)}, ++ }; ++ const int testcount = sizeof(testcases) / sizeof(testcases[0]); ++ for (int test_i = 0; test_i < testcount; test_i++) { ++ const int *fillsize = testcases[test_i]; ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ g_parseAttempts = 0; ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) ++ int scanned_bytes = 0; // sum of (buffered bytes at each actual parse) ++ int offset = 0; ++ while (*fillsize >= 0) { ++ assert_true(offset + *fillsize <= document_length); // or test is invalid ++ const unsigned attempts_before = g_parseAttempts; ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ fillsize++; ++ assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow ++ worstcase_bytes += offset; // we might've tried to parse all pending bytes ++ if (g_parseAttempts != attempts_before) { ++ assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse ++ assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow ++ scanned_bytes += offset; // we *did* try to parse all pending bytes ++ } ++ } ++ assert_true(storage.count == 1); // the big token should've been parsed ++ assert_true(scanned_bytes > 0); // test-the-test: does our counter work? ++ if (g_reparseDeferralEnabledDefault) { ++ // heuristic is enabled; some XML_Parse calls may have deferred reparsing ++ const int max_bytes_scanned = -*fillsize; ++ if (scanned_bytes > max_bytes_scanned) { ++ fprintf(stderr, ++ "bytes scanned in parse attempts: actual=%d limit=%d \n", ++ scanned_bytes, max_bytes_scanned); ++ fail("too many bytes scanned in parse attempts"); ++ } ++ assert_true(scanned_bytes <= worstcase_bytes); ++ } else { ++ // heuristic is disabled; every XML_Parse() will have reparsed ++ assert_true(scanned_bytes == worstcase_bytes); ++ } ++ ++ XML_ParserFree(parser); ++ } ++ free(document); ++} ++END_TEST ++ ++ + /* + * Namespaces tests. + */ +@@ -6902,13 +7529,13 @@ START_TEST(test_return_ns_triplet) { + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +- if (! triplet_start_flag) +- fail("triplet_start_checker not invoked"); + /* Check that unsetting "return triplets" fails while still parsing */ + XML_SetReturnNSTriplet(g_parser, XML_FALSE); + if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); ++ if (! triplet_start_flag) ++ fail("triplet_start_checker not invoked"); + if (! triplet_end_flag) + fail("triplet_end_checker not invoked"); + if (dummy_handler_flags +@@ -12219,6 +12846,7 @@ make_suite(void) { + #if defined(XML_CONTEXT_BYTES) + tcase_add_test(tc_basic, test_get_buffer_3_overflow); + #endif ++ tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); + tcase_add_test(tc_basic, test_byte_info_at_end); + tcase_add_test(tc_basic, test_byte_info_at_error); + tcase_add_test(tc_basic, test_byte_info_at_cdata); +@@ -12337,7 +12965,14 @@ make_suite(void) { + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_pool_integrity_with_unfinished_attr); + tcase_add_test(tc_basic, test_nested_entity_suspend); +- ++ tcase_add_test(tc_basic, test_big_tokens_take_linear_time); ++ tcase_add_test(tc_basic, test_set_reparse_deferral); ++ tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); ++ tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); ++ tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); ++ tcase_add_test(tc_basic, test_set_bad_reparse_option); ++ tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); ++ tcase_add_test(tc_basic, test_varying_buffer_fills); + suite_add_tcase(s, tc_namespace); + tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown); + tcase_add_test(tc_namespace, test_return_ns_triplet); +diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c +index 471f2a2..7c62919 100644 +--- a/expat/xmlwf/xmlwf.c ++++ b/expat/xmlwf/xmlwf.c +@@ -914,6 +914,9 @@ usage(const XML_Char *prog, int rc) { + T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") + T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n") + T("\n") ++ T("reparse deferral:\n") ++ T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n") ++ T("\n") + T("info arguments:\n") + T(" -h show this [h]elp message and exit\n") + T(" -v show program's [v]ersion number and exit\n") +@@ -967,6 +970,8 @@ tmain(int argc, XML_Char **argv) { + unsigned long long attackThresholdBytes; + XML_Bool attackThresholdGiven = XML_FALSE; + ++ XML_Bool disableDeferral = XML_FALSE; ++ + int exitCode = XMLWF_EXIT_SUCCESS; + enum XML_ParamEntityParsing paramEntityParsing + = XML_PARAM_ENTITY_PARSING_NEVER; +@@ -1089,6 +1094,11 @@ tmain(int argc, XML_Char **argv) { + #endif + break; + } ++ case T('q'): { ++ disableDeferral = XML_TRUE; ++ j++; ++ break; ++ } + case T('\0'): + if (j > 1) { + i++; +@@ -1134,6 +1144,16 @@ tmain(int argc, XML_Char **argv) { + #endif + } + ++ if (disableDeferral) { ++ const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE); ++ if (! success) { ++ // This prevents tperror(..) from reporting misleading "[..]: Success" ++ errno = EINVAL; ++ tperror(T("Failed to disable reparse deferral")); ++ exit(XMLWF_EXIT_INTERNAL_ERROR); ++ } ++ } ++ + if (requireStandalone) + XML_SetNotStandaloneHandler(parser, notStandalone); + XML_SetParamEntityParsing(parser, paramEntityParsing); +diff --git a/expat/xmlwf/xmlwf_helpgen.py b/expat/xmlwf/xmlwf_helpgen.py +index c2a527f..1bd0a0a 100755 +--- a/expat/xmlwf/xmlwf_helpgen.py ++++ b/expat/xmlwf/xmlwf_helpgen.py +@@ -81,6 +81,10 @@ billion_laughs.add_argument('-a', metavar='FACTOR', + help='set maximum tolerated [a]mplification factor (default: 100.0)') + billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)') + ++reparse_deferral = parser.add_argument_group('reparse deferral') ++reparse_deferral.add_argument('-q', metavar='FACTOR', ++ help='disable reparse deferral, and allow [q]uadratic parse runtime with large tokens') ++ + parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)') + + info = parser.add_argument_group('info arguments') +diff --git a/testdata/largefiles/aaaaaa_attr.xml b/testdata/largefiles/aaaaaa_attr.xml +new file mode 100644 +index 0000000..66e3d25 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_attr.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_cdata.xml b/testdata/largefiles/aaaaaa_cdata.xml +new file mode 100644 +index 0000000..66f64bd +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_cdata.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_comment.xml b/testdata/largefiles/aaaaaa_comment.xml +new file mode 100644 +index 0000000..bb9af13 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_comment.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_tag.xml b/testdata/largefiles/aaaaaa_tag.xml +new file mode 100644 +index 0000000..946f701 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_tag.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_text.xml b/testdata/largefiles/aaaaaa_text.xml +new file mode 100644 +index 0000000..e266acb +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_text.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file diff --git a/expat-2.5.0-CVE-2024-28757.patch b/expat-2.5.0-CVE-2024-28757.patch new file mode 100644 index 0000000..0ac0f9e --- /dev/null +++ b/expat-2.5.0-CVE-2024-28757.patch @@ -0,0 +1,172 @@ +commit cd3b344e0dbd19a812d0b4f34f9d089ed7c5c411 +Author: Tomas Korbar ACHARS +Date: Tue Mar 19 15:12:18 2024 +0100 + + Fix CVE-2024-28757 + + Upstream PRs #841 and #842 + +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index 2ae64e9..0896b16 100644 +--- a/expat/lib/xmlparse.c ++++ b/expat/lib/xmlparse.c +@@ -6164,7 +6164,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, + dtd->keepProcessing = dtd->standalone; + goto endEntityValue; + } +- if (entity->open) { ++ if (entity->open || (entity == parser->m_declEntity)) { + if (enc == parser->m_encoding) + parser->m_eventPtr = entityTextPtr; + result = XML_ERROR_RECURSIVE_ENTITY_REF; +@@ -7680,6 +7680,8 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { + + static float + accountingGetCurrentAmplification(XML_Parser rootParser) { ++ // 1.........1.........12 => 22 ++ const size_t lenOfShortestInclude = sizeof("") - 1; + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; +@@ -7687,7 +7689,9 @@ accountingGetCurrentAmplification(XML_Parser rootParser) { + = rootParser->m_accounting.countBytesDirect + ? (countBytesOutput + / (float)(rootParser->m_accounting.countBytesDirect)) +- : 1.0f; ++ : ((lenOfShortestInclude ++ + rootParser->m_accounting.countBytesIndirect) ++ / (float)lenOfShortestInclude); + assert(! rootParser->m_parentParser); + return amplificationFactor; + } +diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c +index 941f61d..93adc45 100644 +--- a/expat/tests/runtests.c ++++ b/expat/tests/runtests.c +@@ -1788,6 +1788,48 @@ START_TEST(test_wfc_no_recursive_entity_refs) { + } + END_TEST + ++START_TEST(test_recursive_external_parameter_entity_2) { ++ struct TestCase { ++ const char *doc; ++ enum XML_Status expectedStatus; ++ }; ++ ++ struct TestCase cases[] = { ++ {"", XML_STATUS_ERROR}, ++ {"" ++ "", ++ XML_STATUS_ERROR}, ++ {"" ++ "", ++ XML_STATUS_OK}, ++ {"", XML_STATUS_OK}, ++ }; ++ ++ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ const char *const doc = cases[i].doc; ++ const enum XML_Status expectedStatus = cases[i].expectedStatus; ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ ++ XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); ++ assert_true(ext_parser != NULL); ++ ++ const enum XML_Status actualStatus ++ = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); ++ ++ assert_true(actualStatus == expectedStatus); ++ if (actualStatus != XML_STATUS_OK) { ++ assert_true(XML_GetErrorCode(ext_parser) ++ == XML_ERROR_RECURSIVE_ENTITY_REF); ++ } ++ ++ XML_ParserFree(ext_parser); ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ + /* Test incomplete external entities are faulted */ + START_TEST(test_ext_entity_invalid_parse) { + const char *text = "" ++ // (22) that is used in function accountingGetCurrentAmplification in ++ // xmlparse.c. ++ // 1.........1.........1.........1.........1..4 => 44 ++ const char doc[] = ""; ++ const int docLen = (int)sizeof(doc) - 1; ++ const float maximumToleratedAmplification = 2.0f; ++ ++ struct TestCase { ++ int offsetOfThreshold; ++ enum XML_Status expectedStatus; ++ }; ++ ++ struct TestCase cases[] = { ++ {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR}, ++ {+1, XML_STATUS_OK}, {+2, XML_STATUS_OK}, ++ }; ++ ++ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ const int offsetOfThreshold = cases[i].offsetOfThreshold; ++ const enum XML_Status expectedStatus = cases[i].expectedStatus; ++ const unsigned long long activationThresholdBytes ++ = docLen + offsetOfThreshold; ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ ++ assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parser, maximumToleratedAmplification) ++ == XML_TRUE); ++ assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold( ++ parser, activationThresholdBytes) ++ == XML_TRUE); ++ ++ XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); ++ assert_true(ext_parser != NULL); ++ ++ const enum XML_Status actualStatus ++ = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE); ++ ++ assert_true(actualStatus == expectedStatus); ++ if (actualStatus != XML_STATUS_OK) { ++ assert_true(XML_GetErrorCode(ext_parser) ++ == XML_ERROR_AMPLIFICATION_LIMIT_BREACH); ++ } ++ ++ XML_ParserFree(ext_parser); ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ + #endif // defined(XML_DTD) + + static Suite * +@@ -12871,6 +12967,8 @@ make_suite(void) { + tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_recursive_external_parameter_entity); ++ tcase_add_test__ifdef_xml_dtd(tc_basic, ++ test_recursive_external_parameter_entity_2); + tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); + tcase_add_test(tc_basic, test_suspend_xdecl); + tcase_add_test(tc_basic, test_abort_epilog); +@@ -13120,6 +13218,7 @@ make_suite(void) { + tcase_add_test(tc_accounting, test_accounting_precision); + tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api); + tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable); ++ tcase_add_test(tc_accounting, test_amplification_isolated_external_parser); + #endif + + return s; diff --git a/expat-2.5.0-CVE-2024-45490.patch b/expat-2.5.0-CVE-2024-45490.patch new file mode 100644 index 0000000..044f6c5 --- /dev/null +++ b/expat-2.5.0-CVE-2024-45490.patch @@ -0,0 +1,129 @@ +commit 05d87eb116ddde35bfa4e4c1d2ec7bcbda38c09b +Author: Tomas Korbar +Date: Wed Sep 11 13:48:58 2024 +0200 + + Fix CVE-2024-45490 + + https://github.com/libexpat/libexpat/pull/890 + +diff --git a/expat/doc/reference.html b/expat/doc/reference.html +index a10f3cb..d618bd8 100644 +--- a/expat/doc/reference.html ++++ b/expat/doc/reference.html +@@ -1098,7 +1098,9 @@ containing part (or perhaps all) of the document. The number of bytes of s + that are part of the document is indicated by len. This means + thatsdoesn't have to be null terminated. It also means that + iflenis larger than the number of bytes in the block of +-memory thatspoints at, then a memory fault is likely. The ++memory thatspoints at, then a memory fault is likely. ++Negative values forlenare rejected since Expat 2.2.1. ++The +isFinalparameter informs the parser that this is the last + piece of the document. Frequently, the last piece is empty (i.e. +lenis zero.) +@@ -1114,11 +1116,17 @@ XML_ParseBuffer(XML_Parser p, + int isFinal); +
+ This is just like XML_Parse,
+ except in this case Expat provides the buffer. By obtaining the
+ buffer from Expat with the XML_GetBuffer function, the application can avoid double
+ copying of the input.
++
++Negative values for len are rejected since Expat 2.6.3.
++
Sets a handler for element declarations in a DTD. The handler gets
+ called with the name of the element in the declaration and a pointer
+ to a structure that contains the element model. It's the user code's
+-responsibility to free model when finished with it. See
++responsibility to free model when finished with via a call to
+ XML_FreeContentModel.
+ There is no need to free the model from the handler, it can be kept
+ around and freed at a later stage.
++/* Added in Expat 2.7.2. */ ++XML_Bool ++XML_SetAllocTrackerMaximumAmplification(XML_Parser p, ++ float maximumAmplificationFactor); ++++
++ Sets the maximum tolerated amplification factor
++ between direct input and bytes of dynamic memory allocated
++ (default: 100.0)
++ of parser p to maximumAmplificationFactor, and
++ returns XML_TRUE upon success and XML_FALSE upon error.
++
++ Note: ++ There are three types of allocations that intentionally bypass tracking and limiting: ++
++XML_MemMalloc
++ and
++ XML_MemRealloc
++ —
++ healthy use of these two functions continues to be a responsibility
++ of the application using Expat
++ —,
++ XML_GetBuffer
++ and
++ XML_ParseBuffer
++ (and thus also by plain
++ XML_Parse), and
++ XML_FreeContentModel).
++ The amplification factor is calculated as ..
++amplification := allocated / direct++
++ .. while parsing, whereas
++ direct is the number of bytes read from the primary document in parsing and
++ allocated is the number of bytes of dynamic memory allocated in the parser hierarchy.
++
For a call to XML_SetAllocTrackerMaximumAmplification to succeed:
p must be a non-NULL root parser (without any parent parsers) andmaximumAmplificationFactor must be non-NaN and greater than or equal to 1.0.++ Note: ++ If you ever need to increase this value for non-attack payload, ++ please file a bug report. ++
++ ++++ Note: ++ Amplifications factors greater than 100 can been observed near the start of parsing ++ even with benign files in practice. ++ ++ So if you do reduce the maximum allowed amplification, ++ please make sure that the activation threshold is still big enough ++ to not end up with undesired false positives (i.e. benign files being rejected). ++
++++/* Added in Expat 2.7.2. */ ++XML_Bool ++XML_SetAllocTrackerActivationThreshold(XML_Parser p, ++ unsigned long long activationThresholdBytes); ++++
++ Sets number of allocated bytes of dynamic memory
++ needed to activate protection against disproportionate use of RAM
++ (default: 64 MiB)
++ of parser p to activationThresholdBytes, and
++ returns XML_TRUE upon success and XML_FALSE upon error.
++
++ Note:
++ For types of allocations that intentionally bypass tracking and limiting, please see
++ XML_SetAllocTrackerMaximumAmplification
++ above.
++
For a call to XML_SetAllocTrackerActivationThreshold to succeed:
p must be a non-NULL root parser (without any parent parsers).++ Note: ++ If you ever need to increase this value for non-attack payload, ++ please file a bug report. ++
+++ /* Added in Expat 2.6.0. */ +diff --git a/expat/doc/xmlwf.xml b/expat/doc/xmlwf.xml +index 3d35393..aeb2dc0 100644 +--- a/expat/doc/xmlwf.xml ++++ b/expat/doc/xmlwf.xml +@@ -152,19 +152,31 @@ supports both. ++ + Sets the maximum tolerated amplification factor +- for protection against billion laughs attacks (default: 100.0). ++ for protection against amplification attacks ++ like the billion laughs attack ++ (default: 100.0 ++ for the sum of direct and indirect output and also ++ for allocations of dynamic memory). + The amplification factor is calculated as .. + ++ amplification := (direct + indirect) / direct + ++- .. while parsing, whereas ++ .. with regard to use of entities and .. ++ ++++ amplification := allocated / direct ++ ++++ .. with regard to dynamic memory while parsing. + <direct> is the number of bytes read +- from the primary document in parsing and ++ from the primary document in parsing, + <indirect> is the number of bytes + added by expanding entities and reading of external DTD files, +- combined. ++ combined, and ++ <allocated> is the total number of bytes of dynamic memory ++ allocated (and not freed) per hierarchy of parsers. + ++ NOTE : +@@ -179,8 +191,10 @@ supports both. ++ + Sets the number of output bytes (including amplification) +- needed to activate protection against billion laughs attacks +- (default: 8 MiB). ++ needed to activate protection against amplification attacks ++ like billion laughs ++ (default: 8 MiB for the sum of direct and indirect output, ++ and 64 MiB for allocations of dynamic memory). + This can be thought of as an "activation threshold". + ++diff --git a/expat/lib/expat.h b/expat/lib/expat.h +index 69b0ba1..6eae1d6 100644 +--- a/expat/lib/expat.h ++++ b/expat/lib/expat.h +@@ -1028,8 +1028,11 @@ enum XML_FeatureEnum { + XML_FEATURE_ATTR_INFO, + /* Added in Expat 2.4.0. */ + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, +- XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT ++ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + /* Additional features must be added to the end of this enum. */ ++ /* Added in Expat 2.7.2. */ ++ XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, ++ XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, + }; + + typedef struct { +@@ -1051,6 +1054,16 @@ XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XMLPARSEAPI(XML_Bool) + XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); ++ ++/* Added in Expat 2.7.2. */ ++XMLPARSEAPI(XML_Bool) ++XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, ++ float maximumAmplificationFactor); ++ ++/* Added in Expat 2.7.2. */ ++XMLPARSEAPI(XML_Bool) ++XML_SetAllocTrackerActivationThreshold( ++ XML_Parser parser, unsigned long long activationThresholdBytes); + #endif + + /* Added in Expat 2.6.0. */ +diff --git a/expat/lib/internal.h b/expat/lib/internal.h +index e2709c8..1d1e7da 100644 +--- a/expat/lib/internal.h ++++ b/expat/lib/internal.h +@@ -107,6 +107,7 @@ + #endif + + #include // ULONG_MAX ++#include // size_t + + #if defined(_WIN32) \ + && (! defined(__USE_MINGW_ANSI_STDIO) \ +@@ -144,6 +145,16 @@ + 100.0f + #define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ + 8388608 // 8 MiB, 2^23 ++ ++#define EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT 100.0f ++#define EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT \ ++ 67108864 // 64 MiB, 2^26 ++ ++// NOTE: If function expat_alloc was user facing, EXPAT_MALLOC_ALIGNMENT would ++// have to take sizeof(long double) into account ++#define EXPAT_MALLOC_ALIGNMENT sizeof(long long) // largest parser (sub)member ++#define EXPAT_MALLOC_PADDING ((EXPAT_MALLOC_ALIGNMENT) - sizeof(size_t)) ++ + /* NOTE END */ + + #include "expat.h" // so we can use type XML_Parser below +@@ -163,6 +174,9 @@ const char *unsignedCharToPrintable(unsigned char c); + + extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c + extern unsigned int g_parseAttempts; // used for testing only ++void *expat_malloc(XML_Parser parser, size_t size, int sourceLine); ++void expat_free(XML_Parser parser, void *ptr, int sourceLine); ++void *expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine); + + #ifdef __cplusplus + } +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index d68d2c8..cba41f4 100644 +--- a/expat/lib/xmlparse.c ++++ b/expat/lib/xmlparse.c +@@ -214,7 +214,7 @@ typedef struct { + unsigned char power; + size_t size; + size_t used; +- const XML_Memory_Handling_Suite *mem; ++ XML_Parser parser; + } HASH_TABLE; + + static size_t keylen(KEY s); +@@ -337,7 +337,7 @@ typedef struct { + const XML_Char *end; + XML_Char *ptr; + XML_Char *start; +- const XML_Memory_Handling_Suite *mem; ++ XML_Parser parser; + } STRING_POOL; + + /* The XML_Char before the name is used to determine whether +@@ -432,6 +432,14 @@ typedef struct accounting { + unsigned long long activationThresholdBytes; + } ACCOUNTING; + ++typedef struct MALLOC_TRACKER { ++ XmlBigCount bytesAllocated; ++ XmlBigCount peakBytesAllocated; // updated live only for debug level >=2 ++ unsigned long debugLevel; ++ float maximumAmplificationFactor; // >=1.0 ++ XmlBigCount activationThresholdBytes; ++} MALLOC_TRACKER; ++ + typedef struct entity_stats { + unsigned int countEverOpened; + unsigned int currentDepth; +@@ -530,26 +538,23 @@ static XML_Bool setContext(XML_Parser parser, const XML_Char *context); + + static void FASTCALL normalizePublicId(XML_Char *s); + +-static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); ++static DTD *dtdCreate(XML_Parser parser); + /* do not call if m_parentParser != NULL */ +-static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); +-static void dtdDestroy(DTD *p, XML_Bool isDocEntity, +- const XML_Memory_Handling_Suite *ms); ++static void dtdReset(DTD *p, XML_Parser parser); ++static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser); + static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, +- const XML_Memory_Handling_Suite *ms); ++ XML_Parser parser); + static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *, + const HASH_TABLE *); + static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, + size_t createSize); +-static void FASTCALL hashTableInit(HASH_TABLE *, +- const XML_Memory_Handling_Suite *ms); ++static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser); + static void FASTCALL hashTableClear(HASH_TABLE *); + static void FASTCALL hashTableDestroy(HASH_TABLE *); + static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); + static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *); + +-static void FASTCALL poolInit(STRING_POOL *, +- const XML_Memory_Handling_Suite *ms); ++static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser); + static void FASTCALL poolClear(STRING_POOL *); + static void FASTCALL poolDestroy(STRING_POOL *); + static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, +@@ -569,15 +574,15 @@ static XML_Content *build_model(XML_Parser parser); + static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, + const char *ptr, const char *end); + +-static XML_Char *copyString(const XML_Char *s, +- const XML_Memory_Handling_Suite *memsuite); ++static XML_Char *copyString(const XML_Char *s, XML_Parser parser); + + static unsigned long generate_hash_secret_salt(XML_Parser parser); + static XML_Bool startParsing(XML_Parser parser); + + static XML_Parser parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, +- const XML_Char *nameSep, DTD *dtd); ++ const XML_Char *nameSep, DTD *dtd, ++ XML_Parser parentParser); + + static void parserInit(XML_Parser parser, const XML_Char *encodingName); + +@@ -734,14 +739,215 @@ struct XML_ParserStruct { + unsigned long m_hash_secret_salt; + #ifdef XML_DTD + ACCOUNTING m_accounting; ++ MALLOC_TRACKER m_alloc_tracker; + ENTITY_STATS m_entity_stats; + #endif + XML_Bool m_reenter; + }; + +-#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +-#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) +-#define FREE(parser, p) (parser->m_mem.free_fcn((p))) ++# define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__)) ++# define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__)) ++# define FREE(parser, p) (expat_free((parser), (p), __LINE__)) ++ ++static void ++expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff, ++ XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) { ++ // NOTE: This can be +infinity or -nan ++ const float amplification ++ = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; ++ fprintf( ++ stderr, ++ "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL( ++ "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n", ++ (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator, ++ absDiff, newTotal, peakTotal, (double)amplification, sourceLine); ++} ++ ++static bool ++expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase, ++ int sourceLine) { ++ assert(rootParser != NULL); ++ assert(increase > 0); ++ ++ XmlBigCount newTotal = 0; ++ bool tolerable = true; ++ ++ // Detect integer overflow ++ if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) { ++ tolerable = false; ++ } else { ++ newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase; ++ ++ if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) { ++ assert(newTotal > 0); ++ // NOTE: This can be +infinity when dividing by zero but not -nan ++ const float amplification ++ = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; ++ if (amplification ++ > rootParser->m_alloc_tracker.maximumAmplificationFactor) { ++ tolerable = false; ++ } ++ } ++ } ++ ++ if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) { ++ expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine); ++ } ++ ++ return tolerable; ++} ++ ++void *expat_malloc(XML_Parser parser, size_t size, int sourceLine) { ++ // Detect integer overflow ++ if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) { ++ return NULL; ++ } ++ ++ const XML_Parser rootParser = getRootParserOf(parser, NULL); ++ assert(rootParser->m_parentParser == NULL); ++ ++ const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size; ++ ++ if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated ++ < bytesToAllocate) { ++ return NULL; // i.e. signal integer overflow as out-of-memory ++ } ++ ++ if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate, ++ sourceLine)) { ++ return NULL; // i.e. signal violation as out-of-memory ++ } ++ ++ // Actually allocate ++ void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate); ++ ++ if (mallocedPtr == NULL) { ++ return NULL; ++ } ++ ++ // Update in-block recorded size ++ *(size_t *)mallocedPtr = size; ++ ++ // Update accounting ++ rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate; ++ ++ // Report as needed ++ if (rootParser->m_alloc_tracker.debugLevel >= 2) { ++ if (rootParser->m_alloc_tracker.bytesAllocated ++ > rootParser->m_alloc_tracker.peakBytesAllocated) { ++ rootParser->m_alloc_tracker.peakBytesAllocated ++ = rootParser->m_alloc_tracker.bytesAllocated; ++ } ++ expat_heap_stat(rootParser, '+', bytesToAllocate, ++ rootParser->m_alloc_tracker.bytesAllocated, ++ rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); ++ } ++ ++ return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; ++} ++ ++void expat_free(XML_Parser parser, void *ptr, int sourceLine) { ++ assert(parser != NULL); ++ ++ if (ptr == NULL) { ++ return; ++ } ++ ++ const XML_Parser rootParser = getRootParserOf(parser, NULL); ++ assert(rootParser->m_parentParser == NULL); ++ ++ // Extract size (to the eyes of malloc_fcn/realloc_fcn) and ++ // the original pointer returned by malloc/realloc ++ void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); ++ const size_t bytesAllocated ++ = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr; ++ ++ // Update accounting ++ assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated); ++ rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated; ++ ++ // Report as needed ++ if (rootParser->m_alloc_tracker.debugLevel >= 2) { ++ expat_heap_stat(rootParser, '-', bytesAllocated, ++ rootParser->m_alloc_tracker.bytesAllocated, ++ rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); ++ } ++ ++ // NOTE: This may be freeing rootParser, so freeing has to come last ++ parser->m_mem.free_fcn(mallocedPtr); ++} ++ ++void *expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) { ++ assert(parser != NULL); ++ ++ if (ptr == NULL) { ++ return expat_malloc(parser, size, sourceLine); ++ } ++ ++ if (size == 0) { ++ expat_free(parser, ptr, sourceLine); ++ return NULL; ++ } ++ ++ const XML_Parser rootParser = getRootParserOf(parser, NULL); ++ assert(rootParser->m_parentParser == NULL); ++ ++ // Extract original size (to the eyes of the caller) and the original ++ // pointer returned by malloc/realloc ++ void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); ++ const size_t prevSize = *(size_t *)mallocedPtr; ++ ++ // Classify upcoming change ++ const bool isIncrease = (size > prevSize); ++ const size_t absDiff ++ = (size > prevSize) ? (size - prevSize) : (prevSize - size); ++ ++ // Ask for permission from accounting ++ if (isIncrease) { ++ if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) { ++ return NULL; // i.e. signal violation as out-of-memory ++ } ++ } ++ ++ // NOTE: Integer overflow detection has already been done for us ++ // by expat_heap_increase_tolerable(..) above ++ assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size); ++ ++ // Actually allocate ++ mallocedPtr = parser->m_mem.realloc_fcn( ++ mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size); ++ ++ if (mallocedPtr == NULL) { ++ return NULL; ++ } ++ ++ // Update accounting ++ if (isIncrease) { ++ assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated ++ >= absDiff); ++ rootParser->m_alloc_tracker.bytesAllocated += absDiff; ++ } else { // i.e. decrease ++ assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff); ++ rootParser->m_alloc_tracker.bytesAllocated -= absDiff; ++ } ++ ++ // Report as needed ++ if (rootParser->m_alloc_tracker.debugLevel >= 2) { ++ if (rootParser->m_alloc_tracker.bytesAllocated ++ > rootParser->m_alloc_tracker.peakBytesAllocated) { ++ rootParser->m_alloc_tracker.peakBytesAllocated ++ = rootParser->m_alloc_tracker.bytesAllocated; ++ } ++ expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff, ++ rootParser->m_alloc_tracker.bytesAllocated, ++ rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); ++ } ++ ++ // Update in-block recorded size ++ *(size_t *)mallocedPtr = size; ++ ++ return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; ++} + + XML_Parser XMLCALL + XML_ParserCreate(const XML_Char *encodingName) { +@@ -1059,19 +1265,34 @@ XML_Parser XMLCALL + XML_ParserCreate_MM(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep) { +- return parserCreate(encodingName, memsuite, nameSep, NULL); ++ return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); + } + + static XML_Parser + parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, +- DTD *dtd) { +- XML_Parser parser; ++ DTD *dtd, XML_Parser parentParser) { ++ XML_Parser parser = NULL; ++ ++ const size_t increase ++ = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct); ++ ++ if (parentParser != NULL) { ++ const XML_Parser rootParser = getRootParserOf(parentParser, NULL); ++ if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) { ++ return NULL; ++ } ++ } + + if (memsuite) { + XML_Memory_Handling_Suite *mtemp; +- parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); +- if (parser != NULL) { ++ void *const sizeAndParser ++ = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING ++ + sizeof(struct XML_ParserStruct)); ++ if (sizeAndParser != NULL) { ++ *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); ++ parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) ++ + EXPAT_MALLOC_PADDING); + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = memsuite->malloc_fcn; + mtemp->realloc_fcn = memsuite->realloc_fcn; +@@ -1079,18 +1300,59 @@ parserCreate(const XML_Char *encodingName, + } + } else { + XML_Memory_Handling_Suite *mtemp; +- parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); +- if (parser != NULL) { ++ void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING ++ + sizeof(struct XML_ParserStruct)); ++ if (sizeAndParser != NULL) { ++ *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); ++ parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) ++ + EXPAT_MALLOC_PADDING); + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = malloc; + mtemp->realloc_fcn = realloc; + mtemp->free_fcn = free; + } +- } ++ } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0 + + if (! parser) + return parser; + ++ // Initialize .m_alloc_tracker ++ memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER)); ++ if (parentParser == NULL) { ++ parser->m_alloc_tracker.debugLevel ++ = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u); ++ parser->m_alloc_tracker.maximumAmplificationFactor ++ = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT; ++ parser->m_alloc_tracker.activationThresholdBytes ++ = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT; ++ ++ // NOTE: This initialization needs to come this early because these fields ++ // are read by allocation tracking code ++ parser->m_parentParser = NULL; ++ parser->m_accounting.countBytesDirect = 0; ++ } else { ++ parser->m_parentParser = parentParser; ++ } ++ ++ // Record XML_ParserStruct allocation we did a few lines up before ++ const XML_Parser rootParser = getRootParserOf(parser, NULL); ++ assert(rootParser->m_parentParser == NULL); ++ assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase); ++ rootParser->m_alloc_tracker.bytesAllocated += increase; ++ ++ // Report on allocation ++ if (rootParser->m_alloc_tracker.debugLevel >= 2) { ++ if (rootParser->m_alloc_tracker.bytesAllocated ++ > rootParser->m_alloc_tracker.peakBytesAllocated) { ++ rootParser->m_alloc_tracker.peakBytesAllocated ++ = rootParser->m_alloc_tracker.bytesAllocated; ++ } ++ ++ expat_heap_stat(rootParser, '+', increase, ++ rootParser->m_alloc_tracker.bytesAllocated, ++ rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__); ++ } ++ + parser->m_buffer = NULL; + parser->m_bufferLim = NULL; + +@@ -1125,7 +1387,7 @@ parserCreate(const XML_Char *encodingName, + if (dtd) + parser->m_dtd = dtd; + else { +- parser->m_dtd = dtdCreate(&parser->m_mem); ++ parser->m_dtd = dtdCreate(parser); + if (parser->m_dtd == NULL) { + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_atts); +@@ -1159,8 +1421,8 @@ parserCreate(const XML_Char *encodingName, + + parser->m_protocolEncodingName = NULL; + +- poolInit(&parser->m_tempPool, &(parser->m_mem)); +- poolInit(&parser->m_temp2Pool, &(parser->m_mem)); ++ poolInit(&parser->m_tempPool, parser); ++ poolInit(&parser->m_temp2Pool, parser); + parserInit(parser, encodingName); + + if (encodingName && ! parser->m_protocolEncodingName) { +@@ -1192,7 +1454,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_processor = prologInitProcessor; + XmlPrologStateInit(&parser->m_prologState); + if (encodingName != NULL) { +- parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); ++ parser->m_protocolEncodingName = copyString(encodingName, parser); + } + parser->m_curBase = NULL; + XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); +@@ -1254,7 +1516,6 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_unknownEncodingMem = NULL; + parser->m_unknownEncodingRelease = NULL; + parser->m_unknownEncodingData = NULL; +- parser->m_parentParser = NULL; + parser->m_parsingStatus.parsing = XML_INITIALIZED; + // Reentry can only be triggered inside m_processor calls + parser->m_reenter = XML_FALSE; +@@ -1344,7 +1605,7 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { + FREE(parser, (void *)parser->m_protocolEncodingName); + parser->m_protocolEncodingName = NULL; + parserInit(parser, encodingName); +- dtdReset(parser->m_dtd, &parser->m_mem); ++ dtdReset(parser->m_dtd, parser); + return XML_TRUE; + } + +@@ -1380,7 +1641,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { + parser->m_protocolEncodingName = NULL; + else { + /* Copy the new encoding name into allocated memory */ +- parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); ++ parser->m_protocolEncodingName = copyString(encodingName, parser); + if (! parser->m_protocolEncodingName) + return XML_STATUS_ERROR; + } +@@ -1489,9 +1750,10 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + */ + if (parser->m_ns) { + XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; +- parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); ++ parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); + } else { +- parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); ++ parser ++ = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser); + } + + if (! parser) +@@ -1535,7 +1797,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + parser->m_prologState.inEntityValue = oldInEntityValue; + if (context) { + #endif /* XML_DTD */ +- if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) ++ if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser) + || ! setContext(parser, context)) { + XML_ParserFree(parser); + return NULL; +@@ -1647,14 +1909,16 @@ XML_ParserFree(XML_Parser parser) { + #else + if (parser->m_dtd) + #endif /* XML_DTD */ +- dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, +- &parser->m_mem); ++ dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser); + FREE(parser, (void *)parser->m_atts); + #ifdef XML_ATTR_INFO + FREE(parser, (void *)parser->m_attInfo); + #endif + FREE(parser, parser->m_groupConnector); +- FREE(parser, parser->m_buffer); ++ // NOTE: We are avoiding FREE(..) here because parser->m_buffer ++ // is not being allocated with MALLOC(..) but with plain ++ // .malloc_fcn(..). ++ parser->m_mem.free_fcn(parser->m_buffer); + FREE(parser, parser->m_dataBuf); + FREE(parser, parser->m_nsAtts); + FREE(parser, parser->m_unknownEncodingMem); +@@ -2246,7 +2510,9 @@ XML_GetBuffer(XML_Parser parser, int len) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +- newBuf = (char *)MALLOC(parser, bufferSize); ++ // NOTE: We are avoiding MALLOC(..) here to leave limiting ++ // the input size to the application using Expat. ++ newBuf = (char *)parser->m_mem.malloc_fcn(bufferSize); + if (newBuf == 0) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; +@@ -2257,7 +2523,10 @@ XML_GetBuffer(XML_Parser parser, int len) { + memcpy(newBuf, &parser->m_bufferPtr[-keep], + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + + keep); +- FREE(parser, parser->m_buffer); ++ // NOTE: We are avoiding FREE(..) here because parser->m_buffer ++ // is not being allocated with MALLOC(..) but with plain ++ // .malloc_fcn(..). ++ parser->m_mem.free_fcn(parser->m_buffer); + parser->m_buffer = newBuf; + parser->m_bufferEnd + = parser->m_buffer +@@ -2273,7 +2542,10 @@ XML_GetBuffer(XML_Parser parser, int len) { + if (parser->m_bufferPtr) { + memcpy(newBuf, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); +- FREE(parser, parser->m_buffer); ++ // NOTE: We are avoiding FREE(..) here because parser->m_buffer ++ // is not being allocated with MALLOC(..) but with plain ++ // .malloc_fcn(..). ++ parser->m_mem.free_fcn(parser->m_buffer); + parser->m_bufferEnd + = newBuf + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); +@@ -2451,28 +2723,43 @@ XML_GetCurrentColumnNumber(XML_Parser parser) { + + void XMLCALL + XML_FreeContentModel(XML_Parser parser, XML_Content *model) { +- if (parser != NULL) +- FREE(parser, model); ++ if (parser == NULL) ++ return; ++ ++ // NOTE: We are avoiding FREE(..) here because the content model ++ // has been created using plain .malloc_fcn(..) rather than MALLOC(..). ++ parser->m_mem.free_fcn(model); + } + + void *XMLCALL + XML_MemMalloc(XML_Parser parser, size_t size) { + if (parser == NULL) + return NULL; +- return MALLOC(parser, size); ++ ++ // NOTE: We are avoiding MALLOC(..) here to not include ++ // user allocations with allocation tracking and limiting. ++ return parser->m_mem.malloc_fcn(size); + } + + void *XMLCALL + XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { + if (parser == NULL) + return NULL; +- return REALLOC(parser, ptr, size); ++ ++ // NOTE: We are avoiding REALLOC(..) here to not include ++ // user allocations with allocation tracking and limiting. ++ return parser->m_mem.realloc_fcn(ptr, size); + } + + void XMLCALL + XML_MemFree(XML_Parser parser, void *ptr) { +- if (parser != NULL) +- FREE(parser, ptr); ++ if (parser == NULL) ++ return; ++ ++ // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and ++ // XML_MemRealloc are not using MALLOC(..) and REALLOC(..) ++ // but plain .malloc_fcn(..) and .realloc_fcn(..), internally. ++ parser->m_mem.free_fcn(ptr); + } + + void XMLCALL +@@ -2669,6 +2956,13 @@ XML_GetFeatureList(void) { + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_BLAP_ACT_THRES"), + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, ++ /* Added in Expat 2.7.2. */ ++ {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, ++ XML_L("XML_AT_MAX_AMP"), ++ (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT}, ++ {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, ++ XML_L("XML_AT_ACT_THRES"), ++ (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT}, + #endif + {XML_FEATURE_END, NULL, 0}}; + +@@ -2697,6 +2991,29 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( + parser->m_accounting.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; + } ++ ++XML_Bool XMLCALL ++XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, ++ float maximumAmplificationFactor) { ++ if ((parser == NULL) || (parser->m_parentParser != NULL) ++ || isnan(maximumAmplificationFactor) ++ || (maximumAmplificationFactor < 1.0f)) { ++ return XML_FALSE; ++ } ++ parser->m_alloc_tracker.maximumAmplificationFactor ++ = maximumAmplificationFactor; ++ return XML_TRUE; ++} ++ ++XML_Bool XMLCALL ++XML_SetAllocTrackerActivationThreshold( ++ XML_Parser parser, unsigned long long activationThresholdBytes) { ++ if ((parser == NULL) || (parser->m_parentParser != NULL)) { ++ return XML_FALSE; ++ } ++ parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes; ++ return XML_TRUE; ++} + #endif /* XML_DTD */ + + XML_Bool XMLCALL +@@ -5652,8 +5969,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + case XML_ROLE_CONTENT_EMPTY: + if (dtd->in_eldecl) { + if (parser->m_elementDeclHandler) { ++ // NOTE: We are avoiding MALLOC(..) here to so that ++ // applications that are not using XML_FreeContentModel but ++ // plain free(..) or .free_fcn() to free the content model's ++ // memory are safe. + XML_Content *content +- = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); ++ = (XML_Content *)parser->m_mem.malloc_fcn(sizeof(XML_Content)); + if (! content) + return XML_ERROR_NO_MEMORY; + content->quant = XML_CQUANT_NONE; +@@ -7009,19 +7330,19 @@ normalizePublicId(XML_Char *publicId) { + } + + static DTD * +-dtdCreate(const XML_Memory_Handling_Suite *ms) { +- DTD *p = ms->malloc_fcn(sizeof(DTD)); ++dtdCreate(XML_Parser parser) { ++ DTD *p = MALLOC(parser, sizeof(DTD)); + if (p == NULL) + return p; +- poolInit(&(p->pool), ms); +- poolInit(&(p->entityValuePool), ms); +- hashTableInit(&(p->generalEntities), ms); +- hashTableInit(&(p->elementTypes), ms); +- hashTableInit(&(p->attributeIds), ms); +- hashTableInit(&(p->prefixes), ms); ++ poolInit(&(p->pool), parser); ++ poolInit(&(p->entityValuePool), parser); ++ hashTableInit(&(p->generalEntities), parser); ++ hashTableInit(&(p->elementTypes), parser); ++ hashTableInit(&(p->attributeIds), parser); ++ hashTableInit(&(p->prefixes), parser); + #ifdef XML_DTD + p->paramEntityRead = XML_FALSE; +- hashTableInit(&(p->paramEntities), ms); ++ hashTableInit(&(p->paramEntities), parser); + #endif /* XML_DTD */ + p->defaultPrefix.name = NULL; + p->defaultPrefix.binding = NULL; +@@ -7041,7 +7362,7 @@ dtdCreate(const XML_Memory_Handling_Suite *ms) { + } + + static void +-dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { ++dtdReset(DTD *p, XML_Parser parser) { + HASH_TABLE_ITER iter; + hashTableIterInit(&iter, &(p->elementTypes)); + for (;;) { +@@ -7049,7 +7370,7 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { + if (! e) + break; + if (e->allocDefaultAtts != 0) +- ms->free_fcn(e->defaultAtts); ++ FREE(parser, e->defaultAtts); + } + hashTableClear(&(p->generalEntities)); + #ifdef XML_DTD +@@ -7066,9 +7387,9 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { + + p->in_eldecl = XML_FALSE; + +- ms->free_fcn(p->scaffIndex); ++ FREE(parser, p->scaffIndex); + p->scaffIndex = NULL; +- ms->free_fcn(p->scaffold); ++ FREE(parser, p->scaffold); + p->scaffold = NULL; + + p->scaffLevel = 0; +@@ -7082,7 +7403,7 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { + } + + static void +-dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { ++dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) { + HASH_TABLE_ITER iter; + hashTableIterInit(&iter, &(p->elementTypes)); + for (;;) { +@@ -7090,7 +7411,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { + if (! e) + break; + if (e->allocDefaultAtts != 0) +- ms->free_fcn(e->defaultAtts); ++ FREE(parser, e->defaultAtts); + } + hashTableDestroy(&(p->generalEntities)); + #ifdef XML_DTD +@@ -7102,10 +7423,10 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { + poolDestroy(&(p->pool)); + poolDestroy(&(p->entityValuePool)); + if (isDocEntity) { +- ms->free_fcn(p->scaffIndex); +- ms->free_fcn(p->scaffold); ++ FREE(parser, p->scaffIndex); ++ FREE(parser, p->scaffold); + } +- ms->free_fcn(p); ++ FREE(parser, p); + } + + /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. +@@ -7113,7 +7434,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { + */ + static int + dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, +- const XML_Memory_Handling_Suite *ms) { ++ XML_Parser parser) { + HASH_TABLE_ITER iter; + + /* Copy the prefix table. */ +@@ -7194,7 +7515,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, + } + #endif + newE->defaultAtts +- = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); ++ = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (! newE->defaultAtts) { + return 0; + } +@@ -7356,7 +7677,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { + /* table->size is a power of 2 */ + table->size = (size_t)1 << INIT_POWER; + tsize = table->size * sizeof(NAMED *); +- table->v = table->mem->malloc_fcn(tsize); ++ table->v = MALLOC(table->parser, tsize); + if (! table->v) { + table->size = 0; + return NULL; +@@ -7396,7 +7717,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { + } + + size_t tsize = newSize * sizeof(NAMED *); +- NAMED **newV = table->mem->malloc_fcn(tsize); ++ NAMED **newV = MALLOC(table->parser, tsize); + if (! newV) + return NULL; + memset(newV, 0, tsize); +@@ -7412,7 +7733,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { + } + newV[j] = table->v[i]; + } +- table->mem->free_fcn(table->v); ++ FREE(table->parser, table->v); + table->v = newV; + table->power = newPower; + table->size = newSize; +@@ -7425,7 +7746,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { + } + } + } +- table->v[i] = table->mem->malloc_fcn(createSize); ++ table->v[i] = MALLOC(table->parser, createSize); + if (! table->v[i]) + return NULL; + memset(table->v[i], 0, createSize); +@@ -7438,7 +7759,7 @@ static void FASTCALL + hashTableClear(HASH_TABLE *table) { + size_t i; + for (i = 0; i < table->size; i++) { +- table->mem->free_fcn(table->v[i]); ++ FREE(table->parser, table->v[i]); + table->v[i] = NULL; + } + table->used = 0; +@@ -7448,17 +7769,17 @@ static void FASTCALL + hashTableDestroy(HASH_TABLE *table) { + size_t i; + for (i = 0; i < table->size; i++) +- table->mem->free_fcn(table->v[i]); +- table->mem->free_fcn(table->v); ++ FREE(table->parser, table->v[i]); ++ FREE(table->parser, table->v); + } + + static void FASTCALL +-hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { ++hashTableInit(HASH_TABLE *p, XML_Parser parser) { + p->power = 0; + p->size = 0; + p->used = 0; + p->v = NULL; +- p->mem = ms; ++ p->parser = parser; + } + + static void FASTCALL +@@ -7478,13 +7799,13 @@ hashTableIterNext(HASH_TABLE_ITER *iter) { + } + + static void FASTCALL +-poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { ++poolInit(STRING_POOL *pool, XML_Parser parser) { + pool->blocks = NULL; + pool->freeBlocks = NULL; + pool->start = NULL; + pool->ptr = NULL; + pool->end = NULL; +- pool->mem = ms; ++ pool->parser = parser; + } + + static void FASTCALL +@@ -7511,13 +7832,13 @@ poolDestroy(STRING_POOL *pool) { + BLOCK *p = pool->blocks; + while (p) { + BLOCK *tem = p->next; +- pool->mem->free_fcn(p); ++ FREE(pool->parser, p); + p = tem; + } + p = pool->freeBlocks; + while (p) { + BLOCK *tem = p->next; +- pool->mem->free_fcn(p); ++ FREE(pool->parser, p); + p = tem; + } + } +@@ -7672,8 +7993,8 @@ poolGrow(STRING_POOL *pool) { + if (bytesToAllocate == 0) + return XML_FALSE; + +- temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, +- (unsigned)bytesToAllocate); ++ temp = (BLOCK *)REALLOC(pool->parser, pool->blocks, ++ (unsigned)bytesToAllocate); + if (temp == NULL) + return XML_FALSE; + pool->blocks = temp; +@@ -7713,7 +8034,7 @@ poolGrow(STRING_POOL *pool) { + if (bytesToAllocate == 0) + return XML_FALSE; + +- tem = pool->mem->malloc_fcn(bytesToAllocate); ++ tem = MALLOC(pool->parser, bytesToAllocate); + if (! tem) + return XML_FALSE; + tem->size = blockSize; +@@ -7828,7 +8149,10 @@ build_model(XML_Parser parser) { + const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) + + (dtd->contentStringLen * sizeof(XML_Char))); + +- ret = (XML_Content *)MALLOC(parser, allocsize); ++ // NOTE: We are avoiding MALLOC(..) here to so that ++ // applications that are not using XML_FreeContentModel but plain ++ // free(..) or .free_fcn() to free the content model's memory are safe. ++ ret = (XML_Content *)parser->m_mem.malloc_fcn(allocsize); + if (! ret) + return NULL; + +@@ -7949,7 +8273,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, + } + + static XML_Char * +-copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { ++copyString(const XML_Char *s, XML_Parser parser) { + size_t charsRequired = 0; + XML_Char *result; + +@@ -7961,7 +8285,7 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { + charsRequired++; + + /* Now allocate space for the copy */ +- result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); ++ result = MALLOC(parser, charsRequired * sizeof(XML_Char)); + if (result == NULL) + return NULL; + /* Copy the original into place */ +diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c +index 5db384d..4a0a2e3 100644 +--- a/expat/tests/runtests.c ++++ b/expat/tests/runtests.c +@@ -51,6 +51,9 @@ + #include // for SIZE_MAX + #include + #include ++#include /* NAN, INFINITY */ ++#include ++#include /* for SIZE_MAX */ + #include + #include /* ptrdiff_t */ + #include +@@ -62,7 +65,10 @@ + # include + #endif + ++#include "expat_config.h" ++ + #include "expat.h" ++#include "internal.h" + #include "chardata.h" + #include "structdata.h" + #include "internal.h" +@@ -11420,6 +11426,209 @@ START_TEST(test_alloc_reset_after_external_entity_parser_create_fail) { + } + END_TEST + ++static size_t ++sizeRecordedFor(void *ptr) { ++ return *(size_t *)((char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t)); ++} ++ ++START_TEST(test_alloc_tracker_size_recorded) { ++ XML_Memory_Handling_Suite memsuite = {malloc, realloc, free}; ++ ++ bool values[] = {true, false}; ++ for (size_t i = 0; i < sizeof(values) / sizeof(values[0]); i++) { ++ const bool useMemSuite = values[i]; ++ XML_Parser parser = useMemSuite ++ ? XML_ParserCreate_MM(NULL, &memsuite, XCS("|")) ++ : XML_ParserCreate(NULL); ++ ++ void *ptr = expat_malloc(parser, 10, -1); ++ ++ assert_true(ptr != NULL); ++ assert_true(sizeRecordedFor(ptr) == 10); ++ ++ assert_true(expat_realloc(parser, ptr, SIZE_MAX / 2, -1) == NULL); ++ ++ assert_true(sizeRecordedFor(ptr) == 10); // i.e. unchanged ++ ++ ptr = expat_realloc(parser, ptr, 20, -1); ++ ++ assert_true(ptr != NULL); ++ assert_true(sizeRecordedFor(ptr) == 20); ++ ++ expat_free(parser, ptr, -1); ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++START_TEST(test_alloc_tracker_pointer_alignment) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(sizeof(long long) >= sizeof(size_t)); // self-test ++ long long *const ptr ++ = (long long *)expat_malloc(parser, 4 * sizeof(long long), -1); ++ ptr[0] = 0LL; ++ ptr[1] = 1LL; ++ ptr[2] = 2LL; ++ ptr[3] = 3LL; ++ expat_free(parser, ptr, -1); ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_alloc_tracker_maximum_amplification) { ++ if (g_reparseDeferralEnabledDefault == XML_TRUE) { ++ return; ++ } ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ // Get .m_accounting.countBytesDirect from 0 to 3 ++ const char *const chunk = " "; ++ assert_true(_XML_Parse_SINGLE_BYTES(parser, chunk, (int)strlen(chunk), ++ /*isFinal=*/XML_FALSE) ++ == XML_STATUS_OK); ++ ++ // Stop activation threshold from interfering ++ assert_true(XML_SetAllocTrackerActivationThreshold(parser, 0) == XML_TRUE); ++ ++ // Exceed maximum amplification: should be rejected. ++ assert_true(expat_malloc(parser, 1000, -1) == NULL); ++ ++ // Increase maximum amplification, and try the same amount once more: should ++ // work. ++ assert_true(XML_SetAllocTrackerMaximumAmplification(parser, 3000.0f) ++ == XML_TRUE); ++ ++ void *const ptr = expat_malloc(parser, 1000, -1); ++ assert_true(ptr != NULL); ++ expat_free(parser, ptr, -1); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_alloc_tracker_threshold) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ // Exceed maximum amplification *before* (default) threshold: should work. ++ void *const ptr = expat_malloc(parser, 1000, -1); ++ assert_true(ptr != NULL); ++ expat_free(parser, ptr, -1); ++ ++ // Exceed maximum amplification *after* threshold: should be rejected. ++ assert_true(XML_SetAllocTrackerActivationThreshold(parser, 999) == XML_TRUE); ++ assert_true(expat_malloc(parser, 1000, -1) == NULL); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_alloc_tracker_getbuffer_unlimited) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ // Artificially lower threshold ++ assert_true(XML_SetAllocTrackerActivationThreshold(parser, 0) == XML_TRUE); ++ ++ // Self-test: Prove that threshold is as rejecting as expected ++ assert_true(expat_malloc(parser, 1000, -1) == NULL); ++ // XML_GetBuffer should be allowed to pass, though ++ assert_true(XML_GetBuffer(parser, 1000) != NULL); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_alloc_tracker_api) { ++ XML_Parser parserWithoutParent = XML_ParserCreate(NULL); ++ XML_Parser parserWithParent = XML_ExternalEntityParserCreate( ++ parserWithoutParent, XCS("entity123"), NULL); ++ if (parserWithoutParent == NULL) ++ fail("parserWithoutParent is NULL"); ++ if (parserWithParent == NULL) ++ fail("parserWithParent is NULL"); ++ ++ // XML_SetAllocTrackerMaximumAmplification, error cases ++ if (XML_SetAllocTrackerMaximumAmplification(NULL, 123.0f) == XML_TRUE) ++ fail("Call with NULL parser is NOT supposed to succeed"); ++ if (XML_SetAllocTrackerMaximumAmplification(parserWithParent, 123.0f) ++ == XML_TRUE) ++ fail("Call with non-root parser is NOT supposed to succeed"); ++ if (XML_SetAllocTrackerMaximumAmplification(parserWithoutParent, NAN) ++ == XML_TRUE) ++ fail("Call with NaN limit is NOT supposed to succeed"); ++ if (XML_SetAllocTrackerMaximumAmplification(parserWithoutParent, -1.0f) ++ == XML_TRUE) ++ fail("Call with negative limit is NOT supposed to succeed"); ++ if (XML_SetAllocTrackerMaximumAmplification(parserWithoutParent, 0.9f) ++ == XML_TRUE) ++ fail("Call with positive limit <1.0 is NOT supposed to succeed"); ++ ++ // XML_SetAllocTrackerMaximumAmplification, success cases ++ if (XML_SetAllocTrackerMaximumAmplification(parserWithoutParent, 1.0f) ++ == XML_FALSE) ++ fail("Call with positive limit >=1.0 is supposed to succeed"); ++ if (XML_SetAllocTrackerMaximumAmplification(parserWithoutParent, 123456.789f) ++ == XML_FALSE) ++ fail("Call with positive limit >=1.0 is supposed to succeed"); ++ if (XML_SetAllocTrackerMaximumAmplification(parserWithoutParent, INFINITY) ++ == XML_FALSE) ++ fail("Call with positive limit >=1.0 is supposed to succeed"); ++ ++ // XML_SetAllocTrackerActivationThreshold, error cases ++ if (XML_SetAllocTrackerActivationThreshold(NULL, 123) == XML_TRUE) ++ fail("Call with NULL parser is NOT supposed to succeed"); ++ if (XML_SetAllocTrackerActivationThreshold(parserWithParent, 123) == XML_TRUE) ++ fail("Call with non-root parser is NOT supposed to succeed"); ++ ++ // XML_SetAllocTrackerActivationThreshold, success cases ++ if (XML_SetAllocTrackerActivationThreshold(parserWithoutParent, 123) ++ == XML_FALSE) ++ fail("Call with non-NULL parentless parser is supposed to succeed"); ++ ++ XML_ParserFree(parserWithParent); ++ XML_ParserFree(parserWithoutParent); ++} ++END_TEST ++ ++START_TEST(test_mem_api_cycle) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ void *ptr = XML_MemMalloc(parser, 10); ++ ++ assert_true(ptr != NULL); ++ memset(ptr, 'x', 10); // assert writability, with ASan in mind ++ ++ ptr = XML_MemRealloc(parser, ptr, 20); ++ ++ assert_true(ptr != NULL); ++ memset(ptr, 'y', 20); // assert writability, with ASan in mind ++ ++ XML_MemFree(parser, ptr); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_mem_api_unlimited) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ assert_true(XML_SetAllocTrackerActivationThreshold(parser, 0) == XML_TRUE); ++ ++ void *ptr = XML_MemMalloc(parser, 1000); ++ ++ assert_true(ptr != NULL); ++ ++ ptr = XML_MemRealloc(parser, ptr, 2000); ++ ++ assert_true(ptr != NULL); ++ ++ XML_MemFree(parser, ptr); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ + static void + nsalloc_setup(void) { + XML_Memory_Handling_Suite memsuite = {duff_allocator, duff_reallocator, free}; +@@ -11820,10 +12029,8 @@ START_TEST(test_nsalloc_realloc_attributes) { + nsalloc_teardown(); + nsalloc_setup(); + } +- if (i == 0) +- fail("Parsing worked despite failing reallocations"); +- else if (i == max_realloc_count) +- fail("Parsing failed at max reallocation count"); ++ assert_true( ++ i == 0); // because expat_realloc relies on expat_malloc to some extent + } + END_TEST + +@@ -13694,6 +13901,18 @@ make_suite(void) { + tcase_add_test__ifdef_xml_dtd( + tc_alloc, test_alloc_reset_after_external_entity_parser_create_fail); + ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_tracker_size_recorded); ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_tracker_pointer_alignment); ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, ++ test_alloc_tracker_maximum_amplification); ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_tracker_threshold); ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, ++ test_alloc_tracker_getbuffer_unlimited); ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_tracker_api); ++ ++ tcase_add_test(tc_alloc, test_mem_api_cycle); ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_mem_api_unlimited); ++ + suite_add_tcase(s, tc_nsalloc); + tcase_add_checked_fixture(tc_nsalloc, nsalloc_setup, nsalloc_teardown); + tcase_add_test(tc_nsalloc, test_nsalloc_xmlns); +diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c +index 7c62919..99622df 100644 +--- a/expat/xmlwf/xmlwf.c ++++ b/expat/xmlwf/xmlwf.c +@@ -908,11 +908,11 @@ usage(const XML_Char *prog, int rc) { + T(" -t write no XML output for [t]iming of plain parsing\n") + T(" -N enable adding doctype and [n]otation declarations\n") + T("\n") +- T("billion laughs attack protection:\n") ++ T("amplification attack protection (e.g. billion laughs):\n") + T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n") + T("\n") + T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") +- T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n") ++ T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n") + T("\n") + T("reparse deferral:\n") + T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n") +@@ -921,6 +921,9 @@ usage(const XML_Char *prog, int rc) { + T(" -h show this [h]elp message and exit\n") + T(" -v show program's [v]ersion number and exit\n") + T("\n") ++ T("environment variables:\n") ++ T(" EXPAT_MALLOC_DEBUG=(0|1|2)\n") ++ T(" Control verbosity of allocation tracker (default: 0)\n") + T("exit status:\n") + T(" 0 the input files are well-formed and the output (if requested) was written successfully\n") + T(" 1 could not allocate data structures, signals a serious problem with execution environment\n") +@@ -1133,12 +1136,15 @@ tmain(int argc, XML_Char **argv) { + #ifdef XML_DTD + XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parser, attackMaximumAmplification); ++ XML_SetAllocTrackerMaximumAmplification(parser, ++ attackMaximumAmplification); + #endif + } + if (attackThresholdGiven) { + #ifdef XML_DTD + XML_SetBillionLaughsAttackProtectionActivationThreshold( + parser, attackThresholdBytes); ++ XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes); + #else + (void)attackThresholdBytes; // silence -Wunused-but-set-variable + #endif +diff --git a/expat/xmlwf/xmlwf_helpgen.py b/expat/xmlwf/xmlwf_helpgen.py +index 1bd0a0a..1bacd60 100755 +--- a/expat/xmlwf/xmlwf_helpgen.py ++++ b/expat/xmlwf/xmlwf_helpgen.py +@@ -32,6 +32,10 @@ + import argparse + + epilog = """ ++environment variables: ++ EXPAT_MALLOC_DEBUG=(0|1|2) ++ Control verbosity of allocation tracker (default: 0) ++ + exit status: + 0 the input files are well-formed and the output (if requested) was written successfully + 1 could not allocate data structures, signals a serious problem with execution environment +@@ -73,13 +77,13 @@ output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not + output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing') + output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations') + +-billion_laughs = parser.add_argument_group('billion laughs attack protection', ++billion_laughs = parser.add_argument_group('amplification attack protection (e.g. billion laughs)', + description='NOTE: ' + 'If you ever need to increase these values ' + 'for non-attack payload, please file a bug report.') + billion_laughs.add_argument('-a', metavar='FACTOR', + help='set maximum tolerated [a]mplification factor (default: 100.0)') +-billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)') ++billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)') + + reparse_deferral = parser.add_argument_group('reparse deferral') + reparse_deferral.add_argument('-q', metavar='FACTOR', diff --git a/mingw-expat.spec b/mingw-expat.spec index ba9a4d6..abd4d6b 100644 --- a/mingw-expat.spec +++ b/mingw-expat.spec @@ -1,18 +1,20 @@ %{?mingw_package_header} +%global unversion 2_5_0 Name: mingw-expat -Version: 2.2.4 -Release: 5%{?dist} +Version: %(echo %{unversion} | sed 's/_/./g') +Release: 1%{?dist} Summary: MinGW Windows port of expat XML parser library License: MIT -URL: http://www.libexpat.org/ -Source0: http://downloads.sourceforge.net/expat/expat-%{version}.tar.bz2 -Patch1: expat-2.2.5-CVE-2018-20843.patch +URL: https://libexpat.github.io/ +Source0: https://github.com/libexpat/libexpat/archive/R_%{unversion}.tar.gz#/expat-%{version}.tar.gz BuildArch: noarch ExclusiveArch: %{ix86} x86_64 +BuildRequires: autoconf, automake, libtool, xmlto, gcc-c++, docbook2X +BuildRequires: make BuildRequires: mingw32-filesystem >= 95 BuildRequires: mingw32-gcc BuildRequires: mingw32-binutils @@ -21,6 +23,14 @@ BuildRequires: mingw64-filesystem >= 95 BuildRequires: mingw64-gcc BuildRequires: mingw64-binutils +Patch0: expat-2.5.0-CVE-2023-52425.patch +Patch1: expat-2.5.0-CVE-2024-28757.patch +Patch2: expat-2.5.0-CVE-2024-45490.patch +Patch3: expat-2.5.0-CVE-2024-45491.patch +Patch4: expat-2.5.0-CVE-2024-45492.patch +Patch5: expat-2.5.0-CVE-2024-50602.patch +Patch6: expat-2.5.0-CVE-2024-8176.patch +Patch7: expat-2.5.0-CVE-2025-59375.patch %description This is expat, the C library for parsing XML, written by James Clark. Expat @@ -73,8 +83,20 @@ Static version of the MinGW Windows expat XML parser library. %prep -%setup -q -n expat-%{version} -%patch1 -p2 -b .cve20843 +%setup -q -n libexpat-R_%{unversion}/expat +pushd .. +%patch -P0 -p1 -b .CVE-2023-52425 +%patch -P1 -p1 -b .CVE-2024-28757 +%patch -P2 -p1 -b .CVE-2024-45490 +%patch -P3 -p1 -b .CVE-2024-45491 +%patch -P4 -p1 -b .CVE-2024-45492 +%patch -P5 -p1 -b .CVE-2024-50602 +%patch -P6 -p1 -b .CVE-2024-8176 +%patch -P7 -p1 -b .CVE-2025-59375 +popd + +sed -i 's/install-data-hook/do-nothing-please/' lib/Makefile.am +autoreconf -fi %build %mingw_configure @@ -87,6 +109,10 @@ Static version of the MinGW Windows expat XML parser library. # Remove .la files find $RPM_BUILD_ROOT -name "*.la" -delete +# Remove xmlwf.exe (should not be packaged) +rm -f $RPM_BUILD_ROOT%{mingw32_bindir}/xmlwf.exe +rm -f $RPM_BUILD_ROOT%{mingw64_bindir}/xmlwf.exe + # Remove documentation which duplicates that found in the native package. rm -r $RPM_BUILD_ROOT%{mingw32_docdir} rm -r $RPM_BUILD_ROOT%{mingw32_mandir}/man1 @@ -97,12 +123,13 @@ rm -r $RPM_BUILD_ROOT%{mingw64_mandir}/man1 %files -n mingw32-expat %license COPYING %{mingw32_bindir}/libexpat-1.dll -%{mingw32_bindir}/xmlwf.exe %{mingw32_libdir}/libexpat.dll.a %{mingw32_libdir}/pkgconfig/expat.pc %{mingw32_includedir}/expat.h %{mingw32_includedir}/expat_config.h %{mingw32_includedir}/expat_external.h +# Add CMake config files +%{mingw32_libdir}/cmake/expat-%{version}/* %files -n mingw32-expat-static %{mingw32_libdir}/libexpat.a @@ -111,18 +138,27 @@ rm -r $RPM_BUILD_ROOT%{mingw64_mandir}/man1 %files -n mingw64-expat %license COPYING %{mingw64_bindir}/libexpat-1.dll -%{mingw64_bindir}/xmlwf.exe %{mingw64_libdir}/libexpat.dll.a %{mingw64_libdir}/pkgconfig/expat.pc %{mingw64_includedir}/expat.h %{mingw64_includedir}/expat_config.h %{mingw64_includedir}/expat_external.h +# Add CMake config files +%{mingw64_libdir}/cmake/expat-%{version}/* %files -n mingw64-expat-static %{mingw64_libdir}/libexpat.a %changelog +* Thu Nov 20 2025 Lili Zhu - 2.5.0-1 +- Rebase to version 2.5.0 +- Fix the following CVEs + CVE-2023-52425 CVE-2024-28757 CVE-2024-45490 + CVE-2024-45491 CVE-2024-45492 CVE-2024-50602 + CVE-2024-8176 CVE-2025-59375 +- Resolves: RHEL-114628 + * Wed Jun 10 2020 Uri Lublin - 2.2.4-5 - Rebuild - Resolves: rhbz#1773899 diff --git a/sources b/sources index d698994..30d98c8 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (expat-2.2.4.tar.bz2) = 788249e2c6edf8521c4a99830fd5e51a55c062b834516f6775759cd71accf6375f12d3bd38f8b069777d081a4380b9549049921386ca0cb4b9b9daa4861d6592 +SHA512 (expat-2.5.0.tar.gz) = 779f0d0f3f2d8b33db0fd044864ab5ab1a40f20501f792fe90ad0d18de536c4765c3749f120e21fec11a0e6c89af1dc576d1fe261c871ca44a594f7b61fd1d9e