1467 lines
58 KiB
Diff
1467 lines
58 KiB
Diff
|
commit 678a2f7efcaaa977886e055613f2332615aef82c
|
||
|
Author: Tomas Korbar <tkorbar@redhat.com>
|
||
|
Date: Tue Feb 13 13:52:28 2024 +0100
|
||
|
|
||
|
Fix CVE-2023-52425
|
||
|
|
||
|
diff --git a/expat/Makefile.am b/expat/Makefile.am
|
||
|
index 37ae373..cd0117f 100644
|
||
|
--- a/expat/Makefile.am
|
||
|
+++ b/expat/Makefile.am
|
||
|
@@ -131,6 +131,11 @@ buildlib:
|
||
|
run-benchmark:
|
||
|
$(MAKE) -C tests/benchmark
|
||
|
./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/recset.xml 65535 3
|
||
|
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_attr.xml 4096 3
|
||
|
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_cdata.xml 4096 3
|
||
|
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_comment.xml 4096 3
|
||
|
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_tag.xml 4096 3
|
||
|
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_text.xml 4096 3
|
||
|
|
||
|
.PHONY: download-xmlts-zip
|
||
|
download-xmlts-zip:
|
||
|
diff --git a/expat/doc/reference.html b/expat/doc/reference.html
|
||
|
index 8b0d47d..a10f3cb 100644
|
||
|
--- a/expat/doc/reference.html
|
||
|
+++ b/expat/doc/reference.html
|
||
|
@@ -151,10 +151,11 @@ interface.</p>
|
||
|
</ul>
|
||
|
</li>
|
||
|
<li>
|
||
|
- <a href="#billion-laughs">Billion Laughs Attack Protection</a>
|
||
|
+ <a href="#attack-protection">Attack Protection</a>
|
||
|
<ul>
|
||
|
<li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li>
|
||
|
<li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li>
|
||
|
+ <li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li>
|
||
|
</ul>
|
||
|
</li>
|
||
|
<li><a href="#miscellaneous">Miscellaneous Functions</a>
|
||
|
@@ -2096,11 +2097,7 @@ parse position may be before the beginning of the buffer.</p>
|
||
|
return NULL.</p>
|
||
|
</div>
|
||
|
|
||
|
-<h3><a name="billion-laughs">Billion Laughs Attack Protection</a></h3>
|
||
|
-
|
||
|
-<p>The functions in this section configure the built-in
|
||
|
- protection against various forms of
|
||
|
- <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>.</p>
|
||
|
+<h3><a name="attack-protection">Attack Protection</a><a name="billion-laughs"></a></h3>
|
||
|
|
||
|
<h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</h4>
|
||
|
<pre class="fcndec">
|
||
|
@@ -2188,6 +2185,27 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
|
||
|
</p>
|
||
|
</div>
|
||
|
|
||
|
+<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4>
|
||
|
+<pre class="fcndec">
|
||
|
+/* Added in Expat 2.6.0. */
|
||
|
+XML_Bool XMLCALL
|
||
|
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
|
||
|
+</pre>
|
||
|
+<div class="fcndef">
|
||
|
+ <p>
|
||
|
+ Large tokens may require many parse calls before enough data is available for Expat to parse it in full.
|
||
|
+ If Expat retried parsing the token on every parse call, parsing could take quadratic time.
|
||
|
+ To avoid this, Expat only retries once a significant amount of new data is available.
|
||
|
+ This function allows disabling this behavior.
|
||
|
+ </p>
|
||
|
+ <p>
|
||
|
+ The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>.
|
||
|
+ </p>
|
||
|
+ <p>
|
||
|
+ Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
|
||
|
+ </p>
|
||
|
+</div>
|
||
|
+
|
||
|
<h3><a name="miscellaneous">Miscellaneous functions</a></h3>
|
||
|
|
||
|
<p>The functions in this section either obtain state information from
|
||
|
diff --git a/expat/doc/xmlwf.xml b/expat/doc/xmlwf.xml
|
||
|
index 9603abf..3d35393 100644
|
||
|
--- a/expat/doc/xmlwf.xml
|
||
|
+++ b/expat/doc/xmlwf.xml
|
||
|
@@ -313,6 +313,16 @@ supports both.
|
||
|
</listitem>
|
||
|
</varlistentry>
|
||
|
|
||
|
+ <varlistentry>
|
||
|
+ <term><option>-q</option></term>
|
||
|
+ <listitem>
|
||
|
+ <para>
|
||
|
+ Disable reparse deferral, and allow quadratic parse runtime
|
||
|
+ on large tokens (default: reparse deferral enabled).
|
||
|
+ </para>
|
||
|
+ </listitem>
|
||
|
+ </varlistentry>
|
||
|
+
|
||
|
<varlistentry>
|
||
|
<term><option>-r</option></term>
|
||
|
<listitem>
|
||
|
diff --git a/expat/lib/expat.h b/expat/lib/expat.h
|
||
|
index 1c83563..842dd70 100644
|
||
|
--- a/expat/lib/expat.h
|
||
|
+++ b/expat/lib/expat.h
|
||
|
@@ -16,6 +16,7 @@
|
||
|
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
|
||
|
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
|
||
|
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
|
||
|
+ Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||
|
Licensed under the MIT license:
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||
|
@@ -1050,6 +1051,10 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
||
|
XML_Parser parser, unsigned long long activationThresholdBytes);
|
||
|
#endif
|
||
|
|
||
|
+/* Added in Expat 2.6.0. */
|
||
|
+XMLPARSEAPI(XML_Bool)
|
||
|
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
|
||
|
+
|
||
|
/* Expat follows the semantic versioning convention.
|
||
|
See http://semver.org.
|
||
|
*/
|
||
|
diff --git a/expat/lib/internal.h b/expat/lib/internal.h
|
||
|
index e09f533..e2709c8 100644
|
||
|
--- a/expat/lib/internal.h
|
||
|
+++ b/expat/lib/internal.h
|
||
|
@@ -31,6 +31,7 @@
|
||
|
Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
|
||
|
Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com>
|
||
|
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
|
||
|
+ Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||
|
Licensed under the MIT license:
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||
|
@@ -160,6 +161,9 @@ unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
|
||
|
const char *unsignedCharToPrintable(unsigned char c);
|
||
|
#endif
|
||
|
|
||
|
+extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
|
||
|
+extern unsigned int g_parseAttempts; // used for testing only
|
||
|
+
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|
||
|
diff --git a/expat/lib/libexpat.def.cmake b/expat/lib/libexpat.def.cmake
|
||
|
index cf434a2..3ff4d55 100644
|
||
|
--- a/expat/lib/libexpat.def.cmake
|
||
|
+++ b/expat/lib/libexpat.def.cmake
|
||
|
@@ -77,3 +77,4 @@ EXPORTS
|
||
|
; added with version 2.4.0
|
||
|
@_EXPAT_COMMENT_DTD@ XML_SetBillionLaughsAttackProtectionActivationThreshold @69
|
||
|
@_EXPAT_COMMENT_DTD@ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70
|
||
|
+XML_SetReparseDeferralEnabled @71
|
||
|
diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
|
||
|
index b6c2eca..2ae64e9 100644
|
||
|
--- a/expat/lib/xmlparse.c
|
||
|
+++ b/expat/lib/xmlparse.c
|
||
|
@@ -73,6 +73,7 @@
|
||
|
# endif
|
||
|
#endif
|
||
|
|
||
|
+#include <stdbool.h>
|
||
|
#include <stddef.h>
|
||
|
#include <string.h> /* memset(), memcpy() */
|
||
|
#include <assert.h>
|
||
|
@@ -196,6 +197,8 @@ typedef char ICHAR;
|
||
|
/* Do safe (NULL-aware) pointer arithmetic */
|
||
|
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
|
||
|
|
||
|
+#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||
|
+
|
||
|
#include "internal.h"
|
||
|
#include "xmltok.h"
|
||
|
#include "xmlrole.h"
|
||
|
@@ -602,6 +605,9 @@ static unsigned long getDebugLevel(const char *variableName,
|
||
|
? 0 \
|
||
|
: ((*((pool)->ptr)++ = c), 1))
|
||
|
|
||
|
+XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
|
||
|
+unsigned int g_parseAttempts = 0; // used for testing only
|
||
|
+
|
||
|
struct XML_ParserStruct {
|
||
|
/* The first member must be m_userData so that the XML_GetUserData
|
||
|
macro works. */
|
||
|
@@ -617,6 +623,9 @@ struct XML_ParserStruct {
|
||
|
const char *m_bufferLim;
|
||
|
XML_Index m_parseEndByteIndex;
|
||
|
const char *m_parseEndPtr;
|
||
|
+ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
|
||
|
+ XML_Bool m_reparseDeferralEnabled;
|
||
|
+ int m_lastBufferRequestSize;
|
||
|
XML_Char *m_dataBuf;
|
||
|
XML_Char *m_dataBufEnd;
|
||
|
XML_StartElementHandler m_startElementHandler;
|
||
|
@@ -948,6 +957,47 @@ get_hash_secret_salt(XML_Parser parser) {
|
||
|
return parser->m_hash_secret_salt;
|
||
|
}
|
||
|
|
||
|
+static enum XML_Error
|
||
|
+callProcessor(XML_Parser parser, const char *start, const char *end,
|
||
|
+ const char **endPtr) {
|
||
|
+ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
|
||
|
+
|
||
|
+ if (parser->m_reparseDeferralEnabled
|
||
|
+ && ! parser->m_parsingStatus.finalBuffer) {
|
||
|
+ // Heuristic: don't try to parse a partial token again until the amount of
|
||
|
+ // available data has increased significantly.
|
||
|
+ const size_t had_before = parser->m_partialTokenBytesBefore;
|
||
|
+ // ...but *do* try anyway if we're close to causing a reallocation.
|
||
|
+ size_t available_buffer
|
||
|
+ = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
|
||
|
+#if XML_CONTEXT_BYTES > 0
|
||
|
+ available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
|
||
|
+#endif
|
||
|
+ available_buffer
|
||
|
+ += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
|
||
|
+ // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
|
||
|
+ const bool enough
|
||
|
+ = (have_now >= 2 * had_before)
|
||
|
+ || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
|
||
|
+
|
||
|
+ if (! enough) {
|
||
|
+ *endPtr = start; // callers may expect this to be set
|
||
|
+ return XML_ERROR_NONE;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ g_parseAttempts += 1;
|
||
|
+ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
|
||
|
+ if (ret == XML_ERROR_NONE) {
|
||
|
+ // if we consumed nothing, remember what we had on this parse attempt.
|
||
|
+ if (*endPtr == start) {
|
||
|
+ parser->m_partialTokenBytesBefore = have_now;
|
||
|
+ } else {
|
||
|
+ parser->m_partialTokenBytesBefore = 0;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
static XML_Bool /* only valid for root parser */
|
||
|
startParsing(XML_Parser parser) {
|
||
|
/* hash functions must be initialized before setContext() is called */
|
||
|
@@ -1129,6 +1179,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
|
||
|
parser->m_bufferEnd = parser->m_buffer;
|
||
|
parser->m_parseEndByteIndex = 0;
|
||
|
parser->m_parseEndPtr = NULL;
|
||
|
+ parser->m_partialTokenBytesBefore = 0;
|
||
|
+ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
|
||
|
+ parser->m_lastBufferRequestSize = 0;
|
||
|
parser->m_declElementType = NULL;
|
||
|
parser->m_declAttributeId = NULL;
|
||
|
parser->m_declEntity = NULL;
|
||
|
@@ -1298,6 +1351,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
|
||
|
to worry which hash secrets each table has.
|
||
|
*/
|
||
|
unsigned long oldhash_secret_salt;
|
||
|
+ XML_Bool oldReparseDeferralEnabled;
|
||
|
|
||
|
/* Validate the oldParser parameter before we pull everything out of it */
|
||
|
if (oldParser == NULL)
|
||
|
@@ -1342,6 +1396,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
|
||
|
to worry which hash secrets each table has.
|
||
|
*/
|
||
|
oldhash_secret_salt = parser->m_hash_secret_salt;
|
||
|
+ oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
|
||
|
|
||
|
#ifdef XML_DTD
|
||
|
if (! context)
|
||
|
@@ -1394,6 +1449,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
|
||
|
parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
|
||
|
parser->m_ns_triplets = oldns_triplets;
|
||
|
parser->m_hash_secret_salt = oldhash_secret_salt;
|
||
|
+ parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
|
||
|
parser->m_parentParser = oldParser;
|
||
|
#ifdef XML_DTD
|
||
|
parser->m_paramEntityParsing = oldParamEntityParsing;
|
||
|
@@ -1848,55 +1904,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
|
||
|
parser->m_parsingStatus.parsing = XML_PARSING;
|
||
|
}
|
||
|
|
||
|
- if (len == 0) {
|
||
|
- parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
|
||
|
- if (! isFinal)
|
||
|
- return XML_STATUS_OK;
|
||
|
- parser->m_positionPtr = parser->m_bufferPtr;
|
||
|
- parser->m_parseEndPtr = parser->m_bufferEnd;
|
||
|
-
|
||
|
- /* If data are left over from last buffer, and we now know that these
|
||
|
- data are the final chunk of input, then we have to check them again
|
||
|
- to detect errors based on that fact.
|
||
|
- */
|
||
|
- parser->m_errorCode
|
||
|
- = parser->m_processor(parser, parser->m_bufferPtr,
|
||
|
- parser->m_parseEndPtr, &parser->m_bufferPtr);
|
||
|
-
|
||
|
- if (parser->m_errorCode == XML_ERROR_NONE) {
|
||
|
- switch (parser->m_parsingStatus.parsing) {
|
||
|
- case XML_SUSPENDED:
|
||
|
- /* It is hard to be certain, but it seems that this case
|
||
|
- * cannot occur. This code is cleaning up a previous parse
|
||
|
- * with no new data (since len == 0). Changing the parsing
|
||
|
- * state requires getting to execute a handler function, and
|
||
|
- * there doesn't seem to be an opportunity for that while in
|
||
|
- * this circumstance.
|
||
|
- *
|
||
|
- * Given the uncertainty, we retain the code but exclude it
|
||
|
- * from coverage tests.
|
||
|
- *
|
||
|
- * LCOV_EXCL_START
|
||
|
- */
|
||
|
- XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
|
||
|
- parser->m_bufferPtr, &parser->m_position);
|
||
|
- parser->m_positionPtr = parser->m_bufferPtr;
|
||
|
- return XML_STATUS_SUSPENDED;
|
||
|
- /* LCOV_EXCL_STOP */
|
||
|
- case XML_INITIALIZED:
|
||
|
- case XML_PARSING:
|
||
|
- parser->m_parsingStatus.parsing = XML_FINISHED;
|
||
|
- /* fall through */
|
||
|
- default:
|
||
|
- return XML_STATUS_OK;
|
||
|
- }
|
||
|
- }
|
||
|
- parser->m_eventEndPtr = parser->m_eventPtr;
|
||
|
- parser->m_processor = errorProcessor;
|
||
|
- return XML_STATUS_ERROR;
|
||
|
- }
|
||
|
#ifndef XML_CONTEXT_BYTES
|
||
|
- else if (parser->m_bufferPtr == parser->m_bufferEnd) {
|
||
|
+ if (parser->m_bufferPtr == parser->m_bufferEnd) {
|
||
|
const char *end;
|
||
|
int nLeftOver;
|
||
|
enum XML_Status result;
|
||
|
@@ -1907,12 +1916,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
|
||
|
parser->m_processor = errorProcessor;
|
||
|
return XML_STATUS_ERROR;
|
||
|
}
|
||
|
+ // though this isn't a buffer request, we assume that `len` is the app's
|
||
|
+ // preferred buffer fill size, and therefore save it here.
|
||
|
+ parser->m_lastBufferRequestSize = len;
|
||
|
parser->m_parseEndByteIndex += len;
|
||
|
parser->m_positionPtr = s;
|
||
|
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
|
||
|
|
||
|
parser->m_errorCode
|
||
|
- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
|
||
|
+ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
|
||
|
|
||
|
if (parser->m_errorCode != XML_ERROR_NONE) {
|
||
|
parser->m_eventEndPtr = parser->m_eventPtr;
|
||
|
@@ -1939,23 +1951,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
|
||
|
&parser->m_position);
|
||
|
nLeftOver = s + len - end;
|
||
|
if (nLeftOver) {
|
||
|
- if (parser->m_buffer == NULL
|
||
|
- || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
|
||
|
- /* avoid _signed_ integer overflow */
|
||
|
- char *temp = NULL;
|
||
|
- const int bytesToAllocate = (int)((unsigned)len * 2U);
|
||
|
- if (bytesToAllocate > 0) {
|
||
|
- temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
|
||
|
- }
|
||
|
- if (temp == NULL) {
|
||
|
- parser->m_errorCode = XML_ERROR_NO_MEMORY;
|
||
|
- parser->m_eventPtr = parser->m_eventEndPtr = NULL;
|
||
|
- parser->m_processor = errorProcessor;
|
||
|
- return XML_STATUS_ERROR;
|
||
|
- }
|
||
|
- parser->m_buffer = temp;
|
||
|
- parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
|
||
|
+ // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
|
||
|
+ // (and XML_ERROR_FINISHED) from XML_GetBuffer.
|
||
|
+ const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
|
||
|
+ parser->m_parsingStatus.parsing = XML_PARSING;
|
||
|
+ void *const temp = XML_GetBuffer(parser, nLeftOver);
|
||
|
+ parser->m_parsingStatus.parsing = originalStatus;
|
||
|
+ // GetBuffer may have overwritten this, but we want to remember what the
|
||
|
+ // app requested, not how many bytes were left over after parsing.
|
||
|
+ parser->m_lastBufferRequestSize = len;
|
||
|
+ if (temp == NULL) {
|
||
|
+ // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
|
||
|
+ parser->m_eventPtr = parser->m_eventEndPtr = NULL;
|
||
|
+ parser->m_processor = errorProcessor;
|
||
|
+ return XML_STATUS_ERROR;
|
||
|
}
|
||
|
+ // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
|
||
|
+ // don't have any data to preserve, and can copy straight into the start
|
||
|
+ // of the buffer rather than the GetBuffer return pointer (which may be
|
||
|
+ // pointing further into the allocated buffer).
|
||
|
memcpy(parser->m_buffer, end, nLeftOver);
|
||
|
}
|
||
|
parser->m_bufferPtr = parser->m_buffer;
|
||
|
@@ -1967,15 +1981,14 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
|
||
|
return result;
|
||
|
}
|
||
|
#endif /* not defined XML_CONTEXT_BYTES */
|
||
|
- else {
|
||
|
- void *buff = XML_GetBuffer(parser, len);
|
||
|
- if (buff == NULL)
|
||
|
- return XML_STATUS_ERROR;
|
||
|
- else {
|
||
|
- memcpy(buff, s, len);
|
||
|
- return XML_ParseBuffer(parser, len, isFinal);
|
||
|
- }
|
||
|
+ void *buff = XML_GetBuffer(parser, len);
|
||
|
+ if (buff == NULL)
|
||
|
+ return XML_STATUS_ERROR;
|
||
|
+ if (len > 0) {
|
||
|
+ assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
|
||
|
+ memcpy(buff, s, len);
|
||
|
}
|
||
|
+ return XML_ParseBuffer(parser, len, isFinal);
|
||
|
}
|
||
|
|
||
|
enum XML_Status XMLCALL
|
||
|
@@ -2015,8 +2028,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
|
||
|
parser->m_parseEndByteIndex += len;
|
||
|
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
|
||
|
|
||
|
- parser->m_errorCode = parser->m_processor(
|
||
|
- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
|
||
|
+ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
|
||
|
+ &parser->m_bufferPtr);
|
||
|
|
||
|
if (parser->m_errorCode != XML_ERROR_NONE) {
|
||
|
parser->m_eventEndPtr = parser->m_eventPtr;
|
||
|
@@ -2061,10 +2074,14 @@ XML_GetBuffer(XML_Parser parser, int len) {
|
||
|
default:;
|
||
|
}
|
||
|
|
||
|
- if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
|
||
|
-#ifdef XML_CONTEXT_BYTES
|
||
|
+ // whether or not the request succeeds, `len` seems to be the app's preferred
|
||
|
+ // buffer fill size; remember it.
|
||
|
+ parser->m_lastBufferRequestSize = len;
|
||
|
+ if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
|
||
|
+ || parser->m_buffer == NULL) {
|
||
|
+#if XML_CONTEXT_BYTES > 0
|
||
|
int keep;
|
||
|
-#endif /* defined XML_CONTEXT_BYTES */
|
||
|
+#endif /* XML_CONTEXT_BYTES > 0 */
|
||
|
/* Do not invoke signed arithmetic overflow: */
|
||
|
int neededSize = (int)((unsigned)len
|
||
|
+ (unsigned)EXPAT_SAFE_PTR_DIFF(
|
||
|
@@ -2073,7 +2090,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
|
||
|
parser->m_errorCode = XML_ERROR_NO_MEMORY;
|
||
|
return NULL;
|
||
|
}
|
||
|
-#ifdef XML_CONTEXT_BYTES
|
||
|
+#if XML_CONTEXT_BYTES > 0
|
||
|
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
|
||
|
if (keep > XML_CONTEXT_BYTES)
|
||
|
keep = XML_CONTEXT_BYTES;
|
||
|
@@ -2083,10 +2100,11 @@ XML_GetBuffer(XML_Parser parser, int len) {
|
||
|
return NULL;
|
||
|
}
|
||
|
neededSize += keep;
|
||
|
-#endif /* defined XML_CONTEXT_BYTES */
|
||
|
- if (neededSize
|
||
|
- <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
|
||
|
-#ifdef XML_CONTEXT_BYTES
|
||
|
+#endif /* XML_CONTEXT_BYTES > 0 */
|
||
|
+ if (parser->m_buffer && parser->m_bufferPtr
|
||
|
+ && neededSize
|
||
|
+ <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
|
||
|
+#if XML_CONTEXT_BYTES > 0
|
||
|
if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
|
||
|
int offset
|
||
|
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
|
||
|
@@ -2099,19 +2117,17 @@ XML_GetBuffer(XML_Parser parser, int len) {
|
||
|
parser->m_bufferPtr -= offset;
|
||
|
}
|
||
|
#else
|
||
|
- if (parser->m_buffer && parser->m_bufferPtr) {
|
||
|
- memmove(parser->m_buffer, parser->m_bufferPtr,
|
||
|
- EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
|
||
|
- parser->m_bufferEnd
|
||
|
- = parser->m_buffer
|
||
|
- + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
|
||
|
- parser->m_bufferPtr = parser->m_buffer;
|
||
|
- }
|
||
|
-#endif /* not defined XML_CONTEXT_BYTES */
|
||
|
+ memmove(parser->m_buffer, parser->m_bufferPtr,
|
||
|
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
|
||
|
+ parser->m_bufferEnd
|
||
|
+ = parser->m_buffer
|
||
|
+ + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
|
||
|
+ parser->m_bufferPtr = parser->m_buffer;
|
||
|
+#endif /* XML_CONTEXT_BYTES > 0 */
|
||
|
} else {
|
||
|
char *newBuf;
|
||
|
int bufferSize
|
||
|
- = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
|
||
|
+ = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
|
||
|
if (bufferSize == 0)
|
||
|
bufferSize = INIT_BUFFER_SIZE;
|
||
|
do {
|
||
|
@@ -2128,7 +2144,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
|
||
|
return NULL;
|
||
|
}
|
||
|
parser->m_bufferLim = newBuf + bufferSize;
|
||
|
-#ifdef XML_CONTEXT_BYTES
|
||
|
+#if XML_CONTEXT_BYTES > 0
|
||
|
if (parser->m_bufferPtr) {
|
||
|
memcpy(newBuf, &parser->m_bufferPtr[-keep],
|
||
|
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
|
||
|
@@ -2158,7 +2174,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
|
||
|
parser->m_bufferEnd = newBuf;
|
||
|
}
|
||
|
parser->m_bufferPtr = parser->m_buffer = newBuf;
|
||
|
-#endif /* not defined XML_CONTEXT_BYTES */
|
||
|
+#endif /* XML_CONTEXT_BYTES > 0 */
|
||
|
}
|
||
|
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
|
||
|
parser->m_positionPtr = NULL;
|
||
|
@@ -2208,7 +2224,7 @@ XML_ResumeParser(XML_Parser parser) {
|
||
|
}
|
||
|
parser->m_parsingStatus.parsing = XML_PARSING;
|
||
|
|
||
|
- parser->m_errorCode = parser->m_processor(
|
||
|
+ parser->m_errorCode = callProcessor(
|
||
|
parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
|
||
|
|
||
|
if (parser->m_errorCode != XML_ERROR_NONE) {
|
||
|
@@ -2561,6 +2577,15 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
||
|
}
|
||
|
#endif /* XML_DTD */
|
||
|
|
||
|
+XML_Bool XMLCALL
|
||
|
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
|
||
|
+ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
|
||
|
+ parser->m_reparseDeferralEnabled = enabled;
|
||
|
+ return XML_TRUE;
|
||
|
+ }
|
||
|
+ return XML_FALSE;
|
||
|
+}
|
||
|
+
|
||
|
/* Initially tag->rawName always points into the parse buffer;
|
||
|
for those TAG instances opened while the current parse buffer was
|
||
|
processed, and not yet closed, we need to store tag->rawName in a more
|
||
|
@@ -4482,15 +4507,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
|
||
|
parser->m_processor = entityValueProcessor;
|
||
|
return entityValueProcessor(parser, next, end, nextPtr);
|
||
|
}
|
||
|
- /* If we are at the end of the buffer, this would cause XmlPrologTok to
|
||
|
- return XML_TOK_NONE on the next call, which would then cause the
|
||
|
- function to exit with *nextPtr set to s - that is what we want for other
|
||
|
- tokens, but not for the BOM - we would rather like to skip it;
|
||
|
- then, when this routine is entered the next time, XmlPrologTok will
|
||
|
- return XML_TOK_INVALID, since the BOM is still in the buffer
|
||
|
+ /* XmlPrologTok has now set the encoding based on the BOM it found, and we
|
||
|
+ must move s and nextPtr forward to consume the BOM.
|
||
|
+
|
||
|
+ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
|
||
|
+ would leave the BOM in the buffer and return. On the next call to this
|
||
|
+ function, our XmlPrologTok call would return XML_TOK_INVALID, since it
|
||
|
+ is not valid to have multiple BOMs.
|
||
|
*/
|
||
|
- else if (tok == XML_TOK_BOM && next == end
|
||
|
- && ! parser->m_parsingStatus.finalBuffer) {
|
||
|
+ else if (tok == XML_TOK_BOM) {
|
||
|
# ifdef XML_DTD
|
||
|
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
|
||
|
XML_ACCOUNT_DIRECT)) {
|
||
|
@@ -4500,7 +4525,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
|
||
|
# endif
|
||
|
|
||
|
*nextPtr = next;
|
||
|
- return XML_ERROR_NONE;
|
||
|
+ s = next;
|
||
|
}
|
||
|
/* If we get this token, we have the start of what might be a
|
||
|
normal tag, but not a declaration (i.e. it doesn't begin with
|
||
|
diff --git a/expat/tests/minicheck.c b/expat/tests/minicheck.c
|
||
|
index 1c65748..f383380 100644
|
||
|
--- a/expat/tests/minicheck.c
|
||
|
+++ b/expat/tests/minicheck.c
|
||
|
@@ -208,6 +208,21 @@ srunner_run_all(SRunner *runner, int verbosity) {
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+void
|
||
|
+_fail(const char *file, int line, const char *msg) {
|
||
|
+ /* Always print the error message so it isn't lost. In this case,
|
||
|
+ we have a failure, so there's no reason to be quiet about what
|
||
|
+ it is.
|
||
|
+ */
|
||
|
+ _check_current_filename = file;
|
||
|
+ _check_current_lineno = line;
|
||
|
+ if (msg != NULL) {
|
||
|
+ const int has_newline = (msg[strlen(msg) - 1] == '\n');
|
||
|
+ fprintf(stderr, "ERROR: %s%s", msg, has_newline ? "" : "\n");
|
||
|
+ }
|
||
|
+ longjmp(env, 1);
|
||
|
+}
|
||
|
+
|
||
|
void
|
||
|
_fail_unless(int condition, const char *file, int line, const char *msg) {
|
||
|
/* Always print the error message so it isn't lost. In this case,
|
||
|
diff --git a/expat/tests/minicheck.h b/expat/tests/minicheck.h
|
||
|
index cc1f835..032b54e 100644
|
||
|
--- a/expat/tests/minicheck.h
|
||
|
+++ b/expat/tests/minicheck.h
|
||
|
@@ -64,7 +64,14 @@ extern "C" {
|
||
|
} \
|
||
|
}
|
||
|
|
||
|
-#define fail(msg) _fail_unless(0, __FILE__, __LINE__, msg)
|
||
|
+
|
||
|
+# define fail(msg) _fail(__FILE__, __LINE__, msg)
|
||
|
+# define assert_true(cond) \
|
||
|
+ do { \
|
||
|
+ if (! (cond)) { \
|
||
|
+ _fail(__FILE__, __LINE__, "check failed: " #cond); \
|
||
|
+ } \
|
||
|
+ } while (0)
|
||
|
|
||
|
typedef void (*tcase_setup_function)(void);
|
||
|
typedef void (*tcase_teardown_function)(void);
|
||
|
@@ -103,6 +110,11 @@ void _check_set_test_info(char const *function, char const *filename,
|
||
|
* Prototypes for the actual implementation.
|
||
|
*/
|
||
|
|
||
|
+# if defined(__GNUC__)
|
||
|
+__attribute__((noreturn))
|
||
|
+# endif
|
||
|
+void
|
||
|
+_fail(const char *file, int line, const char *msg);
|
||
|
void _fail_unless(int condition, const char *file, int line, const char *msg);
|
||
|
Suite *suite_create(const char *name);
|
||
|
TCase *tcase_create(const char *name);
|
||
|
diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c
|
||
|
index 915fa52..941f61d 100644
|
||
|
--- a/expat/tests/runtests.c
|
||
|
+++ b/expat/tests/runtests.c
|
||
|
@@ -54,6 +54,7 @@
|
||
|
#include <ctype.h>
|
||
|
#include <limits.h>
|
||
|
#include <stdint.h> /* intptr_t uint64_t */
|
||
|
+#include <time.h>
|
||
|
|
||
|
#if ! defined(__cplusplus)
|
||
|
# include <stdbool.h>
|
||
|
@@ -1071,7 +1072,7 @@ START_TEST(test_column_number_after_parse) {
|
||
|
const char *text = "<tag></tag>";
|
||
|
XML_Size colno;
|
||
|
|
||
|
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
|
||
|
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
|
||
|
== XML_STATUS_ERROR)
|
||
|
xml_failure(g_parser);
|
||
|
colno = XML_GetCurrentColumnNumber(g_parser);
|
||
|
@@ -2582,7 +2583,7 @@ START_TEST(test_default_current) {
|
||
|
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
|
||
|
== XML_STATUS_ERROR)
|
||
|
xml_failure(g_parser);
|
||
|
- CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD"));
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("DCDCDCDD"));
|
||
|
|
||
|
/* Again, without the defaulting */
|
||
|
XML_ParserReset(g_parser, NULL);
|
||
|
@@ -2593,7 +2594,7 @@ START_TEST(test_default_current) {
|
||
|
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
|
||
|
== XML_STATUS_ERROR)
|
||
|
xml_failure(g_parser);
|
||
|
- CharData_CheckXMLChars(&storage, XCS("DcccccD"));
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("DcccD"));
|
||
|
|
||
|
/* Now with an internal entity to complicate matters */
|
||
|
XML_ParserReset(g_parser, NULL);
|
||
|
@@ -3946,6 +3947,19 @@ START_TEST(test_get_buffer_3_overflow) {
|
||
|
END_TEST
|
||
|
#endif // defined(XML_CONTEXT_BYTES)
|
||
|
|
||
|
+START_TEST(test_getbuffer_allocates_on_zero_len) {
|
||
|
+ for (int first_len = 1; first_len >= 0; first_len--) {
|
||
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+ assert_true(XML_GetBuffer(parser, first_len) != NULL);
|
||
|
+ assert_true(XML_GetBuffer(parser, 0) != NULL);
|
||
|
+ if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
|
||
|
+ xml_failure(parser);
|
||
|
+ XML_ParserFree(parser);
|
||
|
+ }
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
/* Test position information macros */
|
||
|
START_TEST(test_byte_info_at_end) {
|
||
|
const char *text = "<doc></doc>";
|
||
|
@@ -6205,6 +6219,12 @@ START_TEST(test_utf8_in_start_tags) {
|
||
|
char doc[1024];
|
||
|
size_t failCount = 0;
|
||
|
|
||
|
+ // we need all the bytes to be parsed, but we don't want the errors that can
|
||
|
+ // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
|
||
|
+ if (g_reparseDeferralEnabledDefault) {
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||
|
size_t j = 0;
|
||
|
for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
|
||
|
@@ -6830,6 +6850,613 @@ START_TEST(test_nested_entity_suspend) {
|
||
|
}
|
||
|
END_TEST
|
||
|
|
||
|
+/* Regression test for quadratic parsing on large tokens */
|
||
|
+START_TEST(test_big_tokens_take_linear_time) {
|
||
|
+ const char *const too_slow_failure_message
|
||
|
+ = "Compared to the baseline runtime of the first test, this test has a "
|
||
|
+ "slowdown of more than <max_slowdown>. "
|
||
|
+ "Please keep increasing the value by 1 until it reliably passes the "
|
||
|
+ "test on your hardware and open a bug sharing that number with us. "
|
||
|
+ "Thanks in advance!";
|
||
|
+ const struct {
|
||
|
+ const char *pre;
|
||
|
+ const char *post;
|
||
|
+ } text[] = {
|
||
|
+ {"<a>", "</a>"}, // assumed good, used as baseline
|
||
|
+ {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
|
||
|
+ {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
|
||
|
+ {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
|
||
|
+ {"<e><", "/></e>"}, // big elem name, used to be O(N²)
|
||
|
+ };
|
||
|
+ const int num_cases = sizeof(text) / sizeof(text[0]);
|
||
|
+ // For the test we need a <max_slowdown> value that is:
|
||
|
+ // (1) big enough that the test passes reliably (avoiding flaky tests), and
|
||
|
+ // (2) small enough that the test actually catches regressions.
|
||
|
+ const int max_slowdown = 15;
|
||
|
+ char aaaaaa[4096];
|
||
|
+ const int fillsize = (int)sizeof(aaaaaa);
|
||
|
+ const int fillcount = 100;
|
||
|
+
|
||
|
+ memset(aaaaaa, 'a', fillsize);
|
||
|
+
|
||
|
+ if (! g_reparseDeferralEnabledDefault) {
|
||
|
+ return; // heuristic is disabled; we would get O(n^2) and fail.
|
||
|
+ }
|
||
|
+#if defined(_WIN32)
|
||
|
+ if (CLOCKS_PER_SEC < 100000) {
|
||
|
+ // Skip this test if clock() doesn't have reasonably good resolution.
|
||
|
+ // This workaround is only applied to Windows targets, since XSI requires
|
||
|
+ // the value to be 1 000 000 (10x the condition here), and we want to be
|
||
|
+ // very sure that at least one platform in CI can catch regressions.
|
||
|
+ return;
|
||
|
+ }
|
||
|
+#endif
|
||
|
+
|
||
|
+ clock_t baseline = 0;
|
||
|
+ for (int i = 0; i < num_cases; ++i) {
|
||
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+ enum XML_Status status;
|
||
|
+ const clock_t start = clock();
|
||
|
+
|
||
|
+ // parse the start text
|
||
|
+ status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
|
||
|
+ (int)strlen(text[i].pre), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ // parse lots of 'a', failing the test early if it takes too long
|
||
|
+ for (int f = 0; f < fillcount; ++f) {
|
||
|
+ status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ // i == 0 means we're still calculating the baseline value
|
||
|
+ if (i > 0) {
|
||
|
+ const clock_t now = clock();
|
||
|
+ const clock_t clocks_so_far = now - start;
|
||
|
+ const int slowdown = clocks_so_far / baseline;
|
||
|
+ if (slowdown >= max_slowdown) {
|
||
|
+ fprintf(
|
||
|
+ stderr,
|
||
|
+ "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
|
||
|
+ f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
|
||
|
+ fail(too_slow_failure_message);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+ // parse the end text
|
||
|
+ status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
|
||
|
+ (int)strlen(text[i].post), XML_TRUE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+
|
||
|
+ // how long did it take in total?
|
||
|
+ const clock_t end = clock();
|
||
|
+ const clock_t taken = end - start;
|
||
|
+ if (i == 0) {
|
||
|
+ assert_true(taken > 0); // just to make sure we don't div-by-0 later
|
||
|
+ baseline = taken;
|
||
|
+ }
|
||
|
+ const int slowdown = taken / baseline;
|
||
|
+ if (slowdown >= max_slowdown) {
|
||
|
+ fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
|
||
|
+ (int)taken, (int)baseline, slowdown, max_slowdown);
|
||
|
+ fail(too_slow_failure_message);
|
||
|
+ }
|
||
|
+
|
||
|
+ XML_ParserFree(parser);
|
||
|
+ }
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+START_TEST(test_set_reparse_deferral) {
|
||
|
+ const char *const pre = "<d>";
|
||
|
+ const char *const start = "<x attr='";
|
||
|
+ const char *const end = "'></x>";
|
||
|
+ char eeeeee[100];
|
||
|
+ const int fillsize = (int)sizeof(eeeeee);
|
||
|
+ memset(eeeeee, 'e', fillsize);
|
||
|
+
|
||
|
+ for (int enabled = 0; enabled <= 1; enabled += 1) {
|
||
|
+
|
||
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
|
||
|
+ // pre-grow the buffer to avoid reparsing due to almost-fullness
|
||
|
+ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
|
||
|
+
|
||
|
+ CharData storage;
|
||
|
+ CharData_Init(&storage);
|
||
|
+ XML_SetUserData(parser, &storage);
|
||
|
+ XML_SetStartElementHandler(parser, start_element_event_handler);
|
||
|
+
|
||
|
+ enum XML_Status status;
|
||
|
+ // parse the start text
|
||
|
+ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
|
||
|
+
|
||
|
+ // ..and the start of the token
|
||
|
+ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
|
||
|
+
|
||
|
+ // try to parse lots of 'e', but the token isn't finished
|
||
|
+ for (int c = 0; c < 100; ++c) {
|
||
|
+ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
|
||
|
+
|
||
|
+ // end the <x> token.
|
||
|
+ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+
|
||
|
+ if (enabled) {
|
||
|
+ // In general, we may need to push more data to trigger a reparse attempt,
|
||
|
+ // but in this test, the data is constructed to always require it.
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
|
||
|
+ // 2x the token length should suffice; the +1 covers the start and end.
|
||
|
+ for (int c = 0; c < 101; ++c) {
|
||
|
+ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
|
||
|
+
|
||
|
+ XML_ParserFree(parser);
|
||
|
+ }
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+struct element_decl_data {
|
||
|
+ XML_Parser parser;
|
||
|
+ int count;
|
||
|
+};
|
||
|
+
|
||
|
+static void
|
||
|
+element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
|
||
|
+ UNUSED_P(name);
|
||
|
+ struct element_decl_data *testdata = (struct element_decl_data *)userData;
|
||
|
+ testdata->count += 1;
|
||
|
+ XML_FreeContentModel(testdata->parser, model);
|
||
|
+}
|
||
|
+
|
||
|
+static int
|
||
|
+external_inherited_parser(XML_Parser p, const XML_Char *context,
|
||
|
+ const XML_Char *base, const XML_Char *systemId,
|
||
|
+ const XML_Char *publicId) {
|
||
|
+ UNUSED_P(base);
|
||
|
+ UNUSED_P(systemId);
|
||
|
+ UNUSED_P(publicId);
|
||
|
+ const char *const pre = "<!ELEMENT document ANY>\n";
|
||
|
+ const char *const start = "<!ELEMENT ";
|
||
|
+ const char *const end = " ANY>\n";
|
||
|
+ const char *const post = "<!ELEMENT xyz ANY>\n";
|
||
|
+ const int enabled = *(int *)XML_GetUserData(p);
|
||
|
+ char eeeeee[100];
|
||
|
+ char spaces[100];
|
||
|
+ const int fillsize = (int)sizeof(eeeeee);
|
||
|
+ assert_true(fillsize == (int)sizeof(spaces));
|
||
|
+ memset(eeeeee, 'e', fillsize);
|
||
|
+ memset(spaces, ' ', fillsize);
|
||
|
+
|
||
|
+ XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+ // pre-grow the buffer to avoid reparsing due to almost-fullness
|
||
|
+ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
|
||
|
+
|
||
|
+ struct element_decl_data testdata;
|
||
|
+ testdata.parser = parser;
|
||
|
+ testdata.count = 0;
|
||
|
+ XML_SetUserData(parser, &testdata);
|
||
|
+ XML_SetElementDeclHandler(parser, element_decl_counter);
|
||
|
+
|
||
|
+ enum XML_Status status;
|
||
|
+ // parse the initial text
|
||
|
+ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ assert_true(testdata.count == 1); // first element should be done
|
||
|
+
|
||
|
+ // ..and the start of the big token
|
||
|
+ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ assert_true(testdata.count == 1); // still just the first one
|
||
|
+
|
||
|
+ // try to parse lots of 'e', but the token isn't finished
|
||
|
+ for (int c = 0; c < 100; ++c) {
|
||
|
+ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ assert_true(testdata.count == 1); // *still* just the first one
|
||
|
+
|
||
|
+ // end the big token.
|
||
|
+ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+
|
||
|
+ if (enabled) {
|
||
|
+ // In general, we may need to push more data to trigger a reparse attempt,
|
||
|
+ // but in this test, the data is constructed to always require it.
|
||
|
+ assert_true(testdata.count == 1); // or the test is incorrect
|
||
|
+ // 2x the token length should suffice; the +1 covers the start and end.
|
||
|
+ for (int c = 0; c < 101; ++c) {
|
||
|
+ status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+ assert_true(testdata.count == 2); // the big token should be done
|
||
|
+
|
||
|
+ // parse the final text
|
||
|
+ status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
|
||
|
+
|
||
|
+ XML_ParserFree(parser);
|
||
|
+ return XML_STATUS_OK;
|
||
|
+}
|
||
|
+
|
||
|
+START_TEST(test_reparse_deferral_is_inherited) {
|
||
|
+ const char *const text
|
||
|
+ = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
|
||
|
+ for (int enabled = 0; enabled <= 1; ++enabled) {
|
||
|
+
|
||
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+ XML_SetUserData(parser, (void *)&enabled);
|
||
|
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
|
||
|
+ // this handler creates a sub-parser and checks that its deferral behavior
|
||
|
+ // is what we expected, based on the value of `enabled` (in userdata).
|
||
|
+ XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
|
||
|
+ if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
|
||
|
+ xml_failure(parser);
|
||
|
+
|
||
|
+ XML_ParserFree(parser);
|
||
|
+ }
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+START_TEST(test_set_reparse_deferral_on_null_parser) {
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
|
||
|
+ == XML_FALSE);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
|
||
|
+ == XML_FALSE);
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+START_TEST(test_set_reparse_deferral_on_the_fly) {
|
||
|
+ const char *const pre = "<d><x attr='";
|
||
|
+ const char *const end = "'></x>";
|
||
|
+ char iiiiii[100];
|
||
|
+ const int fillsize = (int)sizeof(iiiiii);
|
||
|
+ memset(iiiiii, 'i', fillsize);
|
||
|
+
|
||
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
|
||
|
+
|
||
|
+ CharData storage;
|
||
|
+ CharData_Init(&storage);
|
||
|
+ XML_SetUserData(parser, &storage);
|
||
|
+ XML_SetStartElementHandler(parser, start_element_event_handler);
|
||
|
+
|
||
|
+ enum XML_Status status;
|
||
|
+ // parse the start text
|
||
|
+ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
|
||
|
+
|
||
|
+ // try to parse some 'i', but the token isn't finished
|
||
|
+ status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
|
||
|
+
|
||
|
+ // end the <x> token.
|
||
|
+ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
|
||
|
+
|
||
|
+ // now change the heuristic setting and add *no* data
|
||
|
+ assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
|
||
|
+ // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
|
||
|
+ status = XML_Parse(parser, "", 0, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ CharData_CheckXMLChars(&storage, XCS("dx"));
|
||
|
+
|
||
|
+ XML_ParserFree(parser);
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+START_TEST(test_set_bad_reparse_option) {
|
||
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
||
|
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
|
||
|
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
|
||
|
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
|
||
|
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
|
||
|
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
|
||
|
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
|
||
|
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
|
||
|
+ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
|
||
|
+ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
|
||
|
+ XML_ParserFree(parser);
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+static size_t g_totalAlloc = 0;
|
||
|
+static size_t g_biggestAlloc = 0;
|
||
|
+
|
||
|
+static void *
|
||
|
+counting_realloc(void *ptr, size_t size) {
|
||
|
+ g_totalAlloc += size;
|
||
|
+ if (size > g_biggestAlloc) {
|
||
|
+ g_biggestAlloc = size;
|
||
|
+ }
|
||
|
+ return realloc(ptr, size);
|
||
|
+}
|
||
|
+
|
||
|
+static void *
|
||
|
+counting_malloc(size_t size) {
|
||
|
+ return counting_realloc(NULL, size);
|
||
|
+}
|
||
|
+
|
||
|
+START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
|
||
|
+ if (! g_reparseDeferralEnabledDefault) {
|
||
|
+ return; // this test is irrelevant when the deferral heuristic is disabled.
|
||
|
+ }
|
||
|
+
|
||
|
+ const int document_length = 65536;
|
||
|
+ char *const document = (char *)malloc(document_length);
|
||
|
+
|
||
|
+ const XML_Memory_Handling_Suite memfuncs = {
|
||
|
+ counting_malloc,
|
||
|
+ counting_realloc,
|
||
|
+ free,
|
||
|
+ };
|
||
|
+
|
||
|
+ const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
|
||
|
+ const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
|
||
|
+ const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
|
||
|
+
|
||
|
+ for (const int *leading = leading_list; *leading >= 0; leading++) {
|
||
|
+ for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
|
||
|
+ for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
|
||
|
+ // start by checking that the test looks reasonably valid
|
||
|
+ assert_true(*leading + *bigtoken <= document_length);
|
||
|
+
|
||
|
+ // put 'x' everywhere; some will be overwritten by elements.
|
||
|
+ memset(document, 'x', document_length);
|
||
|
+ // maybe add an initial tag
|
||
|
+ if (*leading) {
|
||
|
+ assert_true(*leading >= 3); // or the test case is invalid
|
||
|
+ memcpy(document, "<a>", 3);
|
||
|
+ }
|
||
|
+ // add the large token
|
||
|
+ document[*leading + 0] = '<';
|
||
|
+ document[*leading + 1] = 'b';
|
||
|
+ memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
|
||
|
+ document[*leading + *bigtoken - 1] = '>';
|
||
|
+
|
||
|
+ // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
|
||
|
+ const int expected_elem_total = 1 + (*leading ? 1 : 0);
|
||
|
+
|
||
|
+ XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+
|
||
|
+ CharData storage;
|
||
|
+ CharData_Init(&storage);
|
||
|
+ XML_SetUserData(parser, &storage);
|
||
|
+ XML_SetStartElementHandler(parser, start_element_event_handler);
|
||
|
+
|
||
|
+ g_biggestAlloc = 0;
|
||
|
+ g_totalAlloc = 0;
|
||
|
+ int offset = 0;
|
||
|
+ // fill data until the big token is covered (but not necessarily parsed)
|
||
|
+ while (offset < *leading + *bigtoken) {
|
||
|
+ assert_true(offset + *fillsize <= document_length);
|
||
|
+ const enum XML_Status status
|
||
|
+ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ offset += *fillsize;
|
||
|
+ }
|
||
|
+ // Now, check that we've had a buffer allocation that could fit the
|
||
|
+ // context bytes and our big token. In order to detect a special case,
|
||
|
+ // we need to know how many bytes of our big token were included in the
|
||
|
+ // first push that contained _any_ bytes of the big token:
|
||
|
+ const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
|
||
|
+ if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
|
||
|
+ // Special case: we aren't saving any context, and the whole big token
|
||
|
+ // was covered by a single fill, so Expat may have parsed directly
|
||
|
+ // from our input pointer, without allocating an internal buffer.
|
||
|
+ } else if (*leading < XML_CONTEXT_BYTES) {
|
||
|
+ assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
|
||
|
+ } else {
|
||
|
+ assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
|
||
|
+ }
|
||
|
+ // fill data until the big token is actually parsed
|
||
|
+ while (storage.count < expected_elem_total) {
|
||
|
+ const size_t alloc_before = g_totalAlloc;
|
||
|
+ assert_true(offset + *fillsize <= document_length);
|
||
|
+ const enum XML_Status status
|
||
|
+ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ offset += *fillsize;
|
||
|
+ // since all the bytes of the big token are already in the buffer,
|
||
|
+ // the bufsize ceiling should make us finish its parsing without any
|
||
|
+ // further buffer allocations. We assume that there will be no other
|
||
|
+ // large allocations in this test.
|
||
|
+ assert_true(g_totalAlloc - alloc_before < 4096);
|
||
|
+ }
|
||
|
+ // test-the-test: was our alloc even called?
|
||
|
+ assert_true(g_totalAlloc > 0);
|
||
|
+ // test-the-test: there shouldn't be any extra start elements
|
||
|
+ assert_true(storage.count == expected_elem_total);
|
||
|
+
|
||
|
+ XML_ParserFree(parser);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+ free(document);
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+START_TEST(test_varying_buffer_fills) {
|
||
|
+ const int KiB = 1024;
|
||
|
+ const int MiB = 1024 * KiB;
|
||
|
+ const int document_length = 16 * MiB;
|
||
|
+ const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
|
||
|
+
|
||
|
+ char *const document = (char *)malloc(document_length);
|
||
|
+ assert_true(document != NULL);
|
||
|
+ memset(document, 'x', document_length);
|
||
|
+ document[0] = '<';
|
||
|
+ document[1] = 't';
|
||
|
+ memset(&document[2], ' ', big - 2); // a very spacy token
|
||
|
+ document[big - 1] = '>';
|
||
|
+
|
||
|
+ // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
|
||
|
+ // When reparse deferral is enabled, the final (negated) value is the expected
|
||
|
+ // maximum number of bytes scanned in parse attempts.
|
||
|
+ const int testcases[][30] = {
|
||
|
+ {8 * MiB, -8 * MiB},
|
||
|
+ {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
|
||
|
+ // zero-size fills shouldn't trigger the bypass
|
||
|
+ {4 * MiB, 0, 4 * MiB, -12 * MiB},
|
||
|
+ {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
|
||
|
+ {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
|
||
|
+ // try to hit the buffer ceiling only once (at the end)
|
||
|
+ {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
|
||
|
+ // try to hit the same buffer ceiling multiple times
|
||
|
+ {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
|
||
|
+
|
||
|
+ // try to hit every ceiling, by always landing 1K shy of the buffer size
|
||
|
+ {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
|
||
|
+ 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
|
||
|
+
|
||
|
+ // try to avoid every ceiling, by always landing 1B past the buffer size
|
||
|
+ // the normal 2x heuristic threshold still forces parse attempts.
|
||
|
+ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
|
||
|
+ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
|
||
|
+ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
|
||
|
+ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
|
||
|
+ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
|
||
|
+ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
|
||
|
+ 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
|
||
|
+ -(10 * MiB + 682 * KiB + 7)},
|
||
|
+ // try to avoid every ceiling again, except on our last fill.
|
||
|
+ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
|
||
|
+ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
|
||
|
+ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
|
||
|
+ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
|
||
|
+ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
|
||
|
+ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
|
||
|
+ 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
|
||
|
+ -(10 * MiB + 682 * KiB + 6)},
|
||
|
+
|
||
|
+ // try to hit ceilings on the way multiple times
|
||
|
+ {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
|
||
|
+ 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
|
||
|
+ 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
|
||
|
+ 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
|
||
|
+ // we'll make a parse attempt at every parse call
|
||
|
+ -(45 * MiB + 12)},
|
||
|
+ };
|
||
|
+ const int testcount = sizeof(testcases) / sizeof(testcases[0]);
|
||
|
+ for (int test_i = 0; test_i < testcount; test_i++) {
|
||
|
+ const int *fillsize = testcases[test_i];
|
||
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
||
|
+ assert_true(parser != NULL);
|
||
|
+ g_parseAttempts = 0;
|
||
|
+
|
||
|
+ CharData storage;
|
||
|
+ CharData_Init(&storage);
|
||
|
+ XML_SetUserData(parser, &storage);
|
||
|
+ XML_SetStartElementHandler(parser, start_element_event_handler);
|
||
|
+
|
||
|
+ int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
|
||
|
+ int scanned_bytes = 0; // sum of (buffered bytes at each actual parse)
|
||
|
+ int offset = 0;
|
||
|
+ while (*fillsize >= 0) {
|
||
|
+ assert_true(offset + *fillsize <= document_length); // or test is invalid
|
||
|
+ const unsigned attempts_before = g_parseAttempts;
|
||
|
+ const enum XML_Status status
|
||
|
+ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
|
||
|
+ if (status != XML_STATUS_OK) {
|
||
|
+ xml_failure(parser);
|
||
|
+ }
|
||
|
+ offset += *fillsize;
|
||
|
+ fillsize++;
|
||
|
+ assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
|
||
|
+ worstcase_bytes += offset; // we might've tried to parse all pending bytes
|
||
|
+ if (g_parseAttempts != attempts_before) {
|
||
|
+ assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
|
||
|
+ assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow
|
||
|
+ scanned_bytes += offset; // we *did* try to parse all pending bytes
|
||
|
+ }
|
||
|
+ }
|
||
|
+ assert_true(storage.count == 1); // the big token should've been parsed
|
||
|
+ assert_true(scanned_bytes > 0); // test-the-test: does our counter work?
|
||
|
+ if (g_reparseDeferralEnabledDefault) {
|
||
|
+ // heuristic is enabled; some XML_Parse calls may have deferred reparsing
|
||
|
+ const int max_bytes_scanned = -*fillsize;
|
||
|
+ if (scanned_bytes > max_bytes_scanned) {
|
||
|
+ fprintf(stderr,
|
||
|
+ "bytes scanned in parse attempts: actual=%d limit=%d \n",
|
||
|
+ scanned_bytes, max_bytes_scanned);
|
||
|
+ fail("too many bytes scanned in parse attempts");
|
||
|
+ }
|
||
|
+ assert_true(scanned_bytes <= worstcase_bytes);
|
||
|
+ } else {
|
||
|
+ // heuristic is disabled; every XML_Parse() will have reparsed
|
||
|
+ assert_true(scanned_bytes == worstcase_bytes);
|
||
|
+ }
|
||
|
+
|
||
|
+ XML_ParserFree(parser);
|
||
|
+ }
|
||
|
+ free(document);
|
||
|
+}
|
||
|
+END_TEST
|
||
|
+
|
||
|
+
|
||
|
/*
|
||
|
* Namespaces tests.
|
||
|
*/
|
||
|
@@ -6902,13 +7529,13 @@ START_TEST(test_return_ns_triplet) {
|
||
|
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
|
||
|
== XML_STATUS_ERROR)
|
||
|
xml_failure(g_parser);
|
||
|
- if (! triplet_start_flag)
|
||
|
- fail("triplet_start_checker not invoked");
|
||
|
/* Check that unsetting "return triplets" fails while still parsing */
|
||
|
XML_SetReturnNSTriplet(g_parser, XML_FALSE);
|
||
|
if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
|
||
|
== XML_STATUS_ERROR)
|
||
|
xml_failure(g_parser);
|
||
|
+ if (! triplet_start_flag)
|
||
|
+ fail("triplet_start_checker not invoked");
|
||
|
if (! triplet_end_flag)
|
||
|
fail("triplet_end_checker not invoked");
|
||
|
if (dummy_handler_flags
|
||
|
@@ -12219,6 +12846,7 @@ make_suite(void) {
|
||
|
#if defined(XML_CONTEXT_BYTES)
|
||
|
tcase_add_test(tc_basic, test_get_buffer_3_overflow);
|
||
|
#endif
|
||
|
+ tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
|
||
|
tcase_add_test(tc_basic, test_byte_info_at_end);
|
||
|
tcase_add_test(tc_basic, test_byte_info_at_error);
|
||
|
tcase_add_test(tc_basic, test_byte_info_at_cdata);
|
||
|
@@ -12337,7 +12965,14 @@ make_suite(void) {
|
||
|
tcase_add_test__ifdef_xml_dtd(tc_basic,
|
||
|
test_pool_integrity_with_unfinished_attr);
|
||
|
tcase_add_test(tc_basic, test_nested_entity_suspend);
|
||
|
-
|
||
|
+ tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
|
||
|
+ tcase_add_test(tc_basic, test_set_reparse_deferral);
|
||
|
+ tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
|
||
|
+ tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
|
||
|
+ tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
|
||
|
+ tcase_add_test(tc_basic, test_set_bad_reparse_option);
|
||
|
+ tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
|
||
|
+ tcase_add_test(tc_basic, test_varying_buffer_fills);
|
||
|
suite_add_tcase(s, tc_namespace);
|
||
|
tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown);
|
||
|
tcase_add_test(tc_namespace, test_return_ns_triplet);
|
||
|
diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c
|
||
|
index 471f2a2..7c62919 100644
|
||
|
--- a/expat/xmlwf/xmlwf.c
|
||
|
+++ b/expat/xmlwf/xmlwf.c
|
||
|
@@ -914,6 +914,9 @@ usage(const XML_Char *prog, int rc) {
|
||
|
T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
|
||
|
T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n")
|
||
|
T("\n")
|
||
|
+ T("reparse deferral:\n")
|
||
|
+ T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
|
||
|
+ T("\n")
|
||
|
T("info arguments:\n")
|
||
|
T(" -h show this [h]elp message and exit\n")
|
||
|
T(" -v show program's [v]ersion number and exit\n")
|
||
|
@@ -967,6 +970,8 @@ tmain(int argc, XML_Char **argv) {
|
||
|
unsigned long long attackThresholdBytes;
|
||
|
XML_Bool attackThresholdGiven = XML_FALSE;
|
||
|
|
||
|
+ XML_Bool disableDeferral = XML_FALSE;
|
||
|
+
|
||
|
int exitCode = XMLWF_EXIT_SUCCESS;
|
||
|
enum XML_ParamEntityParsing paramEntityParsing
|
||
|
= XML_PARAM_ENTITY_PARSING_NEVER;
|
||
|
@@ -1089,6 +1094,11 @@ tmain(int argc, XML_Char **argv) {
|
||
|
#endif
|
||
|
break;
|
||
|
}
|
||
|
+ case T('q'): {
|
||
|
+ disableDeferral = XML_TRUE;
|
||
|
+ j++;
|
||
|
+ break;
|
||
|
+ }
|
||
|
case T('\0'):
|
||
|
if (j > 1) {
|
||
|
i++;
|
||
|
@@ -1134,6 +1144,16 @@ tmain(int argc, XML_Char **argv) {
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
+ if (disableDeferral) {
|
||
|
+ const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
|
||
|
+ if (! success) {
|
||
|
+ // This prevents tperror(..) from reporting misleading "[..]: Success"
|
||
|
+ errno = EINVAL;
|
||
|
+ tperror(T("Failed to disable reparse deferral"));
|
||
|
+ exit(XMLWF_EXIT_INTERNAL_ERROR);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
if (requireStandalone)
|
||
|
XML_SetNotStandaloneHandler(parser, notStandalone);
|
||
|
XML_SetParamEntityParsing(parser, paramEntityParsing);
|
||
|
diff --git a/expat/xmlwf/xmlwf_helpgen.py b/expat/xmlwf/xmlwf_helpgen.py
|
||
|
index c2a527f..1bd0a0a 100755
|
||
|
--- a/expat/xmlwf/xmlwf_helpgen.py
|
||
|
+++ b/expat/xmlwf/xmlwf_helpgen.py
|
||
|
@@ -81,6 +81,10 @@ billion_laughs.add_argument('-a', metavar='FACTOR',
|
||
|
help='set maximum tolerated [a]mplification factor (default: 100.0)')
|
||
|
billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)')
|
||
|
|
||
|
+reparse_deferral = parser.add_argument_group('reparse deferral')
|
||
|
+reparse_deferral.add_argument('-q', metavar='FACTOR',
|
||
|
+ help='disable reparse deferral, and allow [q]uadratic parse runtime with large tokens')
|
||
|
+
|
||
|
parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)')
|
||
|
|
||
|
info = parser.add_argument_group('info arguments')
|
||
|
diff --git a/testdata/largefiles/aaaaaa_attr.xml b/testdata/largefiles/aaaaaa_attr.xml
|
||
|
new file mode 100644
|
||
|
index 0000000..66e3d25
|
||
|
--- /dev/null
|
||
|
+++ b/testdata/largefiles/aaaaaa_attr.xml
|
||
|
@@ -0,0 +1 @@
|
||
|
+<myFile><attack value="ACHARS"></attack></myFile>
|
||
|
\ No newline at end of file
|
||
|
diff --git a/testdata/largefiles/aaaaaa_cdata.xml b/testdata/largefiles/aaaaaa_cdata.xml
|
||
|
new file mode 100644
|
||
|
index 0000000..66f64bd
|
||
|
--- /dev/null
|
||
|
+++ b/testdata/largefiles/aaaaaa_cdata.xml
|
||
|
@@ -0,0 +1 @@
|
||
|
+<myFile><attack><![CDATA[ value: ACHARS ]]></attack></myFile>
|
||
|
\ No newline at end of file
|
||
|
diff --git a/testdata/largefiles/aaaaaa_comment.xml b/testdata/largefiles/aaaaaa_comment.xml
|
||
|
new file mode 100644
|
||
|
index 0000000..bb9af13
|
||
|
--- /dev/null
|
||
|
+++ b/testdata/largefiles/aaaaaa_comment.xml
|
||
|
@@ -0,0 +1 @@
|
||
|
+<myFile><attack><!-- value: ACHARS --></attack></myFile>
|
||
|
\ No newline at end of file
|
||
|
diff --git a/testdata/largefiles/aaaaaa_tag.xml b/testdata/largefiles/aaaaaa_tag.xml
|
||
|
new file mode 100644
|
||
|
index 0000000..946f701
|
||
|
--- /dev/null
|
||
|
+++ b/testdata/largefiles/aaaaaa_tag.xml
|
||
|
@@ -0,0 +1 @@
|
||
|
+<myFile><attack><ACHARS /></attack></myFile>
|
||
|
\ No newline at end of file
|
||
|
diff --git a/testdata/largefiles/aaaaaa_text.xml b/testdata/largefiles/aaaaaa_text.xml
|
||
|
new file mode 100644
|
||
|
index 0000000..e266acb
|
||
|
--- /dev/null
|
||
|
+++ b/testdata/largefiles/aaaaaa_text.xml
|
||
|
@@ -0,0 +1 @@
|
||
|
+<myFile><attack>ACHARS</attack></myFile>
|
||
|
\ No newline at end of file
|