expat/expat-2.5.0-CVE-2024-8176.patch
Tomas Korbar 3dc8bb2fd0 Fix CVE-2024-8176
Resolves: RHEL-57489
2025-03-31 15:44:25 +02:00

1536 lines
57 KiB
Diff

commit c0de4903900004dd3ca91f246e5f6489a49a132b
Author: Tomas Korbar <tkorbar@redhat.com>
Date: Mon Mar 24 10:04:33 2025 +0100
Fix CVE-2024-8176
diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
index 8b2af91..d68d2c8 100644
--- a/expat/lib/xmlparse.c
+++ b/expat/lib/xmlparse.c
@@ -305,6 +305,10 @@ typedef struct {
const XML_Char *publicId;
const XML_Char *notation;
XML_Bool open;
+ XML_Bool hasMore; /* true if entity has not been completely processed */
+ /* An entity can be open while being already completely processed (hasMore ==
+ XML_FALSE). The reason is the delayed closing of entities until their inner
+ entities are processed and closed */
XML_Bool is_param;
XML_Bool is_internal; /* true if declared in internal subset outside PE */
} ENTITY;
@@ -395,6 +399,12 @@ typedef struct {
int *scaffIndex;
} DTD;
+enum EntityType {
+ ENTITY_INTERNAL,
+ ENTITY_ATTRIBUTE,
+ ENTITY_VALUE,
+};
+
typedef struct open_internal_entity {
const char *internalEventPtr;
const char *internalEventEndPtr;
@@ -402,6 +412,7 @@ typedef struct open_internal_entity {
ENTITY *entity;
int startTagLevel;
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
+ enum EntityType type;
} OPEN_INTERNAL_ENTITY;
enum XML_Account {
@@ -461,8 +472,8 @@ static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
const char *next, const char **nextPtr,
XML_Bool haveMore, XML_Bool allowClosingDoctype,
enum XML_Account account);
-static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
- XML_Bool betweenDecl);
+static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
+ XML_Bool betweenDecl, enum EntityType type);
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
@@ -492,16 +503,21 @@ static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
XML_Bool isCdata, const char *,
const char *, STRING_POOL *,
enum XML_Account account);
-static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
- XML_Bool isCdata, const char *,
- const char *, STRING_POOL *,
- enum XML_Account account);
+static enum XML_Error
+appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account, const char **nextPtr);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end,
- enum XML_Account account);
+ enum XML_Account account,
+ const char **nextPtr);
+static enum XML_Error callStoreEntityValue(XML_Parser parser,
+ const ENCODING *enc,
+ const char *start, const char *end,
+ enum XML_Account account);
static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int reportComment(XML_Parser parser, const ENCODING *enc,
@@ -669,6 +685,10 @@ struct XML_ParserStruct {
const char *m_positionPtr;
OPEN_INTERNAL_ENTITY *m_openInternalEntities;
OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
+ OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_openValueEntities;
+ OPEN_INTERNAL_ENTITY *m_freeValueEntities;
XML_Bool m_defaultExpandInternalEntities;
int m_tagLevel;
ENTITY *m_declEntity;
@@ -716,6 +736,7 @@ struct XML_ParserStruct {
ACCOUNTING m_accounting;
ENTITY_STATS m_entity_stats;
#endif
+ XML_Bool m_reenter;
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -986,7 +1007,29 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
}
}
g_parseAttempts += 1;
- const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+ // Run in a loop to eliminate dangerous recursion depths
+ enum XML_Error ret;
+ *endPtr = start;
+ while (1) {
+ // Use endPtr as the new start in each iteration, since it will
+ // be set to the next start point by m_processor.
+ ret = parser->m_processor(parser, *endPtr, end, endPtr);
+
+ // Make parsing status (and in particular XML_SUSPENDED) take
+ // precedence over re-enter flag when they disagree
+ if (parser->m_parsingStatus.parsing != XML_PARSING) {
+ parser->m_reenter = XML_FALSE;
+ }
+
+ if (! parser->m_reenter) {
+ break;
+ }
+
+ parser->m_reenter = XML_FALSE;
+ if (ret != XML_ERROR_NONE)
+ return ret;
+ }
+
if (ret == XML_ERROR_NONE) {
// if we consumed nothing, remember what we had on this parse attempt.
if (*endPtr == start) {
@@ -1097,6 +1140,8 @@ parserCreate(const XML_Char *encodingName,
parser->m_freeBindingList = NULL;
parser->m_freeTagList = NULL;
parser->m_freeInternalEntities = NULL;
+ parser->m_freeAttributeEntities = NULL;
+ parser->m_freeValueEntities = NULL;
parser->m_groupSize = 0;
parser->m_groupConnector = NULL;
@@ -1199,6 +1244,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_eventEndPtr = NULL;
parser->m_positionPtr = NULL;
parser->m_openInternalEntities = NULL;
+ parser->m_openAttributeEntities = NULL;
+ parser->m_openValueEntities = NULL;
parser->m_defaultExpandInternalEntities = XML_TRUE;
parser->m_tagLevel = 0;
parser->m_tagStack = NULL;
@@ -1209,6 +1256,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_unknownEncodingData = NULL;
parser->m_parentParser = NULL;
parser->m_parsingStatus.parsing = XML_INITIALIZED;
+ // Reentry can only be triggered inside m_processor calls
+ parser->m_reenter = XML_FALSE;
#ifdef XML_DTD
parser->m_isParamEntity = XML_FALSE;
parser->m_useForeignDTD = XML_FALSE;
@@ -1268,6 +1317,24 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
}
+ /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
+ * for attributes) */
+ openEntityList = parser->m_openAttributeEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+ /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
+ * for value entities) */
+ openEntityList = parser->m_openValueEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
moveToFreeBindingList(parser, parser->m_inheritedBindings);
FREE(parser, parser->m_unknownEncodingMem);
if (parser->m_unknownEncodingRelease)
@@ -1281,6 +1348,19 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
return XML_TRUE;
}
+static XML_Bool
+parserBusy(XML_Parser parser) {
+ switch (parser->m_parsingStatus.parsing) {
+ case XML_PARSING:
+ case XML_SUSPENDED:
+ return XML_TRUE;
+ case XML_INITIALIZED:
+ case XML_FINISHED:
+ default:
+ return XML_FALSE;
+ }
+}
+
enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
if (parser == NULL)
@@ -1289,8 +1369,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
XXX There's no way for the caller to determine which of the
XXX possible error cases caused the XML_STATUS_ERROR return.
*/
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_STATUS_ERROR;
/* Get rid of any previous encoding name */
@@ -1527,7 +1606,34 @@ XML_ParserFree(XML_Parser parser) {
entityList = entityList->next;
FREE(parser, openEntity);
}
-
+ /* free m_openAttributeEntities and m_freeAttributeEntities */
+ entityList = parser->m_openAttributeEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeAttributeEntities == NULL)
+ break;
+ entityList = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
+ /* free m_openValueEntities and m_freeValueEntities */
+ entityList = parser->m_openValueEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeValueEntities == NULL)
+ break;
+ entityList = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
destroyBindings(parser->m_freeBindingList, parser);
destroyBindings(parser->m_inheritedBindings, parser);
poolDestroy(&parser->m_tempPool);
@@ -1569,8 +1675,7 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
return XML_ERROR_INVALID_ARGUMENT;
#ifdef XML_DTD
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
parser->m_useForeignDTD = useDTD;
return XML_ERROR_NONE;
@@ -1585,8 +1690,7 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
if (parser == NULL)
return;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return;
parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
}
@@ -1855,8 +1959,7 @@ XML_SetParamEntityParsing(XML_Parser parser,
if (parser == NULL)
return 0;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
#ifdef XML_DTD
parser->m_paramEntityParsing = peParsing;
@@ -1873,8 +1976,7 @@ XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
if (parser->m_parentParser)
return XML_SetHashSalt(parser->m_parentParser, hash_salt);
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
parser->m_hash_secret_salt = hash_salt;
return 1;
@@ -2188,6 +2290,11 @@ XML_GetBuffer(XML_Parser parser, int len) {
return parser->m_bufferEnd;
}
+static void
+triggerReenter(XML_Parser parser) {
+ parser->m_reenter = XML_TRUE;
+}
+
enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser, XML_Bool resumable) {
if (parser == NULL)
@@ -2659,8 +2766,9 @@ static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
enum XML_Error result = doContent(
- parser, 0, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
+ parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
+ endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_DIRECT);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2748,6 +2856,11 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start,
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:
start = next;
}
@@ -2921,7 +3034,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
reportDefault(parser, enc, s, next);
break;
}
- result = processInternalEntity(parser, entity, XML_FALSE);
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
} else if (parser->m_externalEntityRefHandler) {
@@ -3047,7 +3160,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3108,7 +3223,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3241,14 +3358,22 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
break;
/* LCOV_EXCL_STOP */
}
- *eventPP = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ *eventPP = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ *eventPP = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:;
+ *eventPP = s = next;
}
}
/* not reached */
@@ -4165,14 +4290,21 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
/* LCOV_EXCL_STOP */
}
- *eventPP = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ *eventPP = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ *eventPP = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
+ *eventPP = s = next;
}
}
/* not reached */
@@ -4504,7 +4636,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
}
/* found end of entity value - can store it now */
return storeEntityValue(parser, parser->m_encoding, s, end,
- XML_ACCOUNT_DIRECT);
+ XML_ACCOUNT_DIRECT, NULL);
} else if (tok == XML_TOK_XML_DECL) {
enum XML_Error result;
result = processXmlDecl(parser, 0, start, next);
@@ -4631,7 +4763,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
+ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
}
start = next;
}
@@ -5069,9 +5201,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
break;
case XML_ROLE_ENTITY_VALUE:
if (dtd->keepProcessing) {
- enum XML_Error result
- = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
- next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
+ enum XML_Error result = callStoreEntityValue(
+ parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
+ XML_ACCOUNT_NONE);
if (parser->m_declEntity) {
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
parser->m_declEntity->textLen
@@ -5467,7 +5599,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
enum XML_Error result;
XML_Bool betweenDecl
= (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
- result = processInternalEntity(parser, entity, betweenDecl);
+ result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
handleDefault = XML_FALSE;
@@ -5672,6 +5804,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:
s = next;
tok = XmlPrologTok(enc, s, end, &next);
@@ -5739,28 +5877,58 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end,
default:
return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
}
- parser->m_eventPtr = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ parser->m_eventPtr = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ parser->m_eventPtr = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
+ parser->m_eventPtr = s = next;
}
}
}
static enum XML_Error
-processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
- const char *textStart, *textEnd;
- const char *next;
- enum XML_Error result;
- OPEN_INTERNAL_ENTITY *openEntity;
+processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
+ enum EntityType type) {
+ OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
+ switch (type) {
+ case ENTITY_INTERNAL:
+ parser->m_processor = internalEntityProcessor;
+ openEntityList = &parser->m_openInternalEntities;
+ freeEntityList = &parser->m_freeInternalEntities;
+ break;
+ case ENTITY_ATTRIBUTE:
+ openEntityList = &parser->m_openAttributeEntities;
+ freeEntityList = &parser->m_freeAttributeEntities;
+ break;
+ case ENTITY_VALUE:
+ openEntityList = &parser->m_openValueEntities;
+ freeEntityList = &parser->m_freeValueEntities;
+ break;
+ /* default case serves merely as a safety net in case of a
+ * wrong entityType. Therefore we exclude the following lines
+ * from the test coverage.
+ *
+ * LCOV_EXCL_START
+ */
+ default:
+ // Should not reach here
+ assert(0);
+ /* LCOV_EXCL_STOP */
+ }
- if (parser->m_freeInternalEntities) {
- openEntity = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity->next;
+ if (*freeEntityList) {
+ openEntity = *freeEntityList;
+ *freeEntityList = openEntity->next;
} else {
openEntity
= (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
@@ -5768,56 +5936,33 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
+ entity->hasMore = XML_TRUE;
#ifdef XML_DTD
entityTrackingOnOpen(parser, entity, __LINE__);
#endif
entity->processed = 0;
- openEntity->next = parser->m_openInternalEntities;
- parser->m_openInternalEntities = openEntity;
+ openEntity->next = *openEntityList;
+ *openEntityList = openEntity;
openEntity->entity = entity;
+ openEntity->type = type;
openEntity->startTagLevel = parser->m_tagLevel;
openEntity->betweenDecl = betweenDecl;
openEntity->internalEventPtr = NULL;
openEntity->internalEventEndPtr = NULL;
- textStart = (const char *)entity->textPtr;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-
-#ifdef XML_DTD
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else
-#endif /* XML_DTD */
- result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
- textStart, textEnd, &next, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
-
- if (result == XML_ERROR_NONE) {
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - textStart);
- parser->m_processor = internalEntityProcessor;
- } else {
-#ifdef XML_DTD
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif /* XML_DTD */
- entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
- /* put openEntity back in list of free instances */
- openEntity->next = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity;
- }
+ // Only internal entities make use of the reenter flag
+ // therefore no need to set it for other entity types
+ if (type == ENTITY_INTERNAL) {
+ triggerReenter(parser);
}
- return result;
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
const char **nextPtr) {
+ UNUSED_P(s);
+ UNUSED_P(end);
+ UNUSED_P(nextPtr);
ENTITY *entity;
const char *textStart, *textEnd;
const char *next;
@@ -5827,72 +5972,63 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
return XML_ERROR_UNEXPECTED_STATE;
entity = openEntity->entity;
- textStart = ((const char *)entity->textPtr) + entity->processed;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-
-#ifdef XML_DTD
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_TRUE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else
-#endif /* XML_DTD */
- result = doContent(parser, openEntity->startTagLevel,
- parser->m_internalEncoding, textStart, textEnd, &next,
- XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
-
- if (result != XML_ERROR_NONE)
- return result;
-
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - (const char *)entity->textPtr);
+ // This will return early
+ if (entity->hasMore) {
+ textStart = ((const char *)entity->textPtr) + entity->processed;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ next = textStart;
+
+ if (entity->is_param) {
+ int tok
+ = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
+ result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
+ tok, next, &next, XML_FALSE, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ } else {
+ result = doContent(parser, openEntity->startTagLevel,
+ parser->m_internalEncoding, textStart, textEnd, &next,
+ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
+ }
+
+ if (result != XML_ERROR_NONE)
+ return result;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed
+ if (textEnd != next
+ && (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))) {
+ entity->processed = (int)(next - (const char *)entity->textPtr);
+ return result;
+ }
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openInternalEntities during doProlog or doContent calls above)
+ entity->hasMore = XML_FALSE;
+ triggerReenter(parser);
return result;
- }
-
+ } // End of entity processing, "if" block will return here
+ // Remove fully processed openEntity from open entity list.
#ifdef XML_DTD
entityTrackingOnClose(parser, entity, __LINE__);
#endif
+ // openEntity is m_openInternalEntities' head, as we set it at the start of
+ // this function and we skipped doProlog and doContent calls with hasMore set
+ // to false. This means we can directly remove the head of
+ // m_openInternalEntities
+ assert(parser->m_openInternalEntities == openEntity);
entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
+ parser->m_openInternalEntities = parser->m_openInternalEntities->next;
/* put openEntity back in list of free instances */
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
- // If there are more open entities we want to stop right here and have the
- // upcoming call to XML_ResumeParser continue with entity content, or it would
- // be ignored altogether.
- if (parser->m_openInternalEntities != NULL
- && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- return XML_ERROR_NONE;
- }
-
-#ifdef XML_DTD
- if (entity->is_param) {
- int tok;
- parser->m_processor = prologProcessor;
- tok = XmlPrologTok(parser->m_encoding, s, end, &next);
- return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
- XML_ACCOUNT_DIRECT);
- } else
-#endif /* XML_DTD */
- {
- parser->m_processor = contentProcessor;
- /* see externalEntityContentProcessor vs contentProcessor */
- result = doContent(parser, parser->m_parentParser ? 1 : 0,
- parser->m_encoding, s, end, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer,
- XML_ACCOUNT_DIRECT);
- if (result == XML_ERROR_NONE) {
- if (! storeRawNames(parser))
- return XML_ERROR_NO_MEMORY;
- }
- return result;
+ if (parser->m_openInternalEntities == NULL) {
+ parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
}
+ triggerReenter(parser);
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
@@ -5908,8 +6044,70 @@ static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
enum XML_Account account) {
- enum XML_Error result
- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
+ const char *next = ptr;
+ enum XML_Error result = XML_ERROR_NONE;
+
+ while (1) {
+ if (! parser->m_openAttributeEntities) {
+ result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
+ account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = appendAttributeValue(
+ parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
+ pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openAttributeEntities during appendAttributeValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+#if XML_DTD == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
+ // openEntity is m_openAttributeEntities' head, since we set it at the
+ // start of this function and because we skipped appendAttributeValue call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openAttributeEntities
+ assert(parser->m_openAttributeEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
+ break;
+ }
+ }
+
if (result)
return result;
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
@@ -5922,7 +6120,7 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
#ifndef XML_DTD
UNUSED_P(account);
@@ -5940,6 +6138,9 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
#endif
switch (tok) {
case XML_TOK_NONE:
+ if (nextPtr) {
+ *nextPtr = next;
+ }
return XML_ERROR_NONE;
case XML_TOK_INVALID:
if (enc == parser->m_encoding)
@@ -6080,21 +6281,11 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
} else {
enum XML_Error result;
- const XML_Char *textEnd = entity->textPtr + entity->textLen;
- entity->open = XML_TRUE;
-#ifdef XML_DTD
- entityTrackingOnOpen(parser, entity, __LINE__);
-#endif
- result = appendAttributeValue(parser, parser->m_internalEncoding,
- isCdata, (const char *)entity->textPtr,
- (const char *)textEnd, pool,
- XML_ACCOUNT_ENTITY_EXPANSION);
-#ifdef XML_DTD
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif
- entity->open = XML_FALSE;
- if (result)
- return result;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
+ if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
+ *nextPtr = next;
+ }
+ return result;
}
} break;
default:
@@ -6122,7 +6313,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *entityTextPtr, const char *entityTextEnd,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
@@ -6140,8 +6331,9 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
return XML_ERROR_NO_MEMORY;
}
+ const char *next;
for (;;) {
- const char *next
+ next
= entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
@@ -6205,16 +6397,8 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
} else
dtd->keepProcessing = dtd->standalone;
} else {
- entity->open = XML_TRUE;
- entityTrackingOnOpen(parser, entity, __LINE__);
- result = storeEntityValue(
- parser, parser->m_internalEncoding, (const char *)entity->textPtr,
- (const char *)(entity->textPtr + entity->textLen),
- XML_ACCOUNT_ENTITY_EXPANSION);
- entityTrackingOnClose(parser, entity, __LINE__);
- entity->open = XML_FALSE;
- if (result)
- goto endEntityValue;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
+ goto endEntityValue;
}
break;
}
@@ -6302,9 +6486,84 @@ endEntityValue:
#ifdef XML_DTD
parser->m_prologState.inEntityValue = oldInEntityValue;
#endif /* XML_DTD */
+ // If 'nextPtr' is given, it should be updated during the processing
+ if (nextPtr != NULL) {
+ *nextPtr = next;
+ }
return result;
}
+static enum XML_Error
+callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
+ const char *entityTextPtr, const char *entityTextEnd,
+ enum XML_Account account) {
+ const char *next = entityTextPtr;
+ enum XML_Error result = XML_ERROR_NONE;
+ while (1) {
+ if (! parser->m_openValueEntities) {
+ result
+ = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
+ textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
+ &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openValueEntities during storeEntityValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+# if XML_DTD == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+# endif
+ // openEntity is m_openValueEntities' head, since we set it at the
+ // start of this function and because we skipped storeEntityValue call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openValueEntities
+ assert(parser->m_openValueEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openValueEntities = parser->m_openValueEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result
+ || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
+ break;
+ }
+ }
+
+ return result;
+}
+
static void FASTCALL
normalizeLines(XML_Char *s) {
XML_Char *p;
diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c
index 5769aa0..5db384d 100644
--- a/expat/tests/runtests.c
+++ b/expat/tests/runtests.c
@@ -47,6 +47,8 @@
#endif
#include <assert.h>
+#include <errno.h>
+#include <stdint.h> // for SIZE_MAX
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -1788,6 +1790,89 @@ START_TEST(test_wfc_no_recursive_entity_refs) {
}
END_TEST
+START_TEST(test_no_indirectly_recursive_entity_refs) {
+ struct TestCase {
+ const char *doc;
+ bool usesParameterEntities;
+ };
+
+ const struct TestCase cases[] = {
+ // general entity + character data
+ {"<!DOCTYPE a [\n"
+ " <!ENTITY e1 '&e2;'>\n"
+ " <!ENTITY e2 '&e1;'>\n"
+ "]><a>&e2;</a>\n",
+ false},
+
+ // general entity + attribute value
+ {"<!DOCTYPE a [\n"
+ " <!ENTITY e1 '&e2;'>\n"
+ " <!ENTITY e2 '&e1;'>\n"
+ "]><a k1='&e2;' />\n",
+ false},
+
+ // parameter entity
+ {"<!DOCTYPE doc [\n"
+ " <!ENTITY % p1 '&#37;p2;'>\n"
+ " <!ENTITY % p2 '&#37;p1;'>\n"
+ " <!ENTITY % define_g \"<!ENTITY g '&#37;p2;'>\">\n"
+ " %define_g;\n"
+ "]>\n"
+ "<doc/>\n",
+ true},
+ };
+ const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
+
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
+ j++) {
+ const XML_Bool reset_wanted = reset_or_not[j];
+ const char *const doc = cases[i].doc;
+ const bool usesParameterEntities = cases[i].usesParameterEntities;
+
+#ifdef XML_DTD // both GE and DTD
+ const bool rejection_expected = true;
+#else // neither DTD nor GE
+ const bool rejection_expected = false;
+#endif
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+#ifdef XML_DTD
+ if (usesParameterEntities) {
+ assert_true(
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
+ == 1);
+ }
+#else
+ UNUSED_P(usesParameterEntities);
+#endif // XML_DTD
+
+ const enum XML_Status status
+ = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
+ /*isFinal*/ XML_TRUE);
+
+ if (rejection_expected) {
+ assert_true(status == XML_STATUS_ERROR);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
+ } else {
+ assert_true(status == XML_STATUS_OK);
+ }
+
+ if (reset_wanted) {
+ // This covers free'ing of (eventually) all three open entity lists by
+ // XML_ParserReset.
+ XML_ParserReset(parser, NULL);
+ }
+
+ // This covers free'ing of (eventually) all three open entity lists by
+ // XML_ParserFree (unless XML_ParserReset has already done that above).
+ XML_ParserFree(parser);
+ }
+ }
+}
+END_TEST
+
START_TEST(test_recursive_external_parameter_entity_2) {
struct TestCase {
const char *doc;
@@ -2958,6 +3043,12 @@ typedef struct elementInfo {
AttrInfo *attributes;
} ElementInfo;
+
+typedef struct StructParserAndElementInfo {
+ XML_Parser parser;
+ ElementInfo *info;
+} ParserAndElementInfo;
+
static void XMLCALL
counting_start_element_handler(void *userData, const XML_Char *name,
const XML_Char **atts) {
@@ -5306,6 +5397,7 @@ END_TEST
typedef struct ext_hdlr_data {
const char *parse_text;
XML_ExternalEntityRefHandler handler;
+ CharData *storage;
} ExtHdlrData;
static int XMLCALL
@@ -5340,7 +5432,7 @@ START_TEST(test_skipped_null_loaded_ext_entity) {
= {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
"<!ENTITY % pe2 '%pe1;'>\n"
"%pe2;\n",
- external_entity_null_loader};
+ external_entity_null_loader, NULL};
XML_SetUserData(g_parser, &test_data);
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -5358,7 +5450,7 @@ START_TEST(test_skipped_unloaded_ext_entity) {
= {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
"<!ENTITY % pe2 '%pe1;'>\n"
"%pe2;\n",
- NULL};
+ NULL, NULL};
XML_SetUserData(g_parser, &test_data);
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -6448,6 +6540,25 @@ accumulate_entity_decl(void *userData, const XML_Char *entityName,
CharData_AppendXMLChars(storage, XCS("\n"), 1);
}
+typedef struct {
+ XML_Parser parser;
+ CharData *storage;
+} ParserPlusStorage;
+
+static void XMLCALL
+accumulate_char_data_and_suspend(void *userData, const XML_Char *s, int len) {
+ ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData;
+
+ CharData_AppendXMLChars(parserPlusStorage->storage, s, len);
+
+ for (int i = 0; i < len; i++) {
+ if (s[i] == 'Z') {
+ XML_StopParser(parserPlusStorage->parser, /*resumable=*/XML_TRUE);
+ break;
+ }
+ }
+}
+
START_TEST(test_utf16_pe) {
/* <!DOCTYPE doc [
* <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
@@ -6901,11 +7012,6 @@ START_TEST(test_pool_integrity_with_unfinished_attr) {
}
END_TEST
-typedef struct {
- XML_Parser parser;
- CharData *storage;
-} ParserPlusStorage;
-
static void XMLCALL
accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) {
ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData;
@@ -6913,6 +7019,147 @@ accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) {
XML_StopParser(parserPlusStorage->parser, XML_TRUE);
}
+/* Test a possible early return location in internalEntityProcessor */
+START_TEST(test_entity_ref_no_elements) {
+ const char *const text = "<!DOCTYPE foo [\n"
+ "<!ENTITY e1 \"test\">\n"
+ "]> <foo>&e1;"; // intentionally missing newline
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
+ XML_ParserFree(parser);
+}
+END_TEST
+
+/* Tests if chained entity references lead to unbounded recursion */
+START_TEST(test_deep_nested_entity) {
+ const size_t N_LINES = 60000;
+ const size_t SIZE_PER_LINE = 50;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "<!DOCTYPE foo [\n"
+ " <!ENTITY s0 'deepText'>\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
+ (long unsigned)i, (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
+ (long unsigned)(N_LINES - 1));
+
+ const XML_Char *const expected = XCS("deepText");
+
+ CharData storage;
+ CharData_Init(&storage);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+ XML_SetUserData(parser, &storage);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
+/* Tests if chained entity references in attributes
+lead to unbounded recursion */
+START_TEST(test_deep_nested_attribute_entity) {
+ const size_t N_LINES = 60000;
+ const size_t SIZE_PER_LINE = 100;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "<!DOCTYPE foo [\n"
+ " <!ENTITY s0 'deepText'>\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
+ (long unsigned)i, (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
+ (long unsigned)(N_LINES - 1));
+
+ AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
+ ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
+ info[0].attributes = doc_info;
+
+ XML_SetStartElementHandler(g_parser, counting_start_element_handler);
+ XML_SetUserData(g_parser, &info);
+
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+
+ free(text);
+}
+END_TEST
+
+START_TEST(test_deep_nested_entity_delayed_interpretation) {
+ const size_t N_LINES = 70000;
+ const size_t SIZE_PER_LINE = 100;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "<!DOCTYPE foo [\n"
+ " <!ENTITY %% s0 'deepText'>\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ " <!ENTITY %% s%lu '&#37;s%lu;'>\n", (long unsigned)i,
+ (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE,
+ " <!ENTITY %% define_g \"<!ENTITY g '&#37;s%lu;'>\">\n"
+ " %%define_g;\n"
+ "]>\n"
+ "<foo/>\n",
+ (long unsigned)(N_LINES - 1));
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
START_TEST(test_nested_entity_suspend) {
const char *const text = "<!DOCTYPE a [\n"
" <!ENTITY e1 '<!--e1-->'>\n"
@@ -6943,6 +7190,35 @@ START_TEST(test_nested_entity_suspend) {
}
END_TEST
+START_TEST(test_nested_entity_suspend_2) {
+ const char *const text = "<!DOCTYPE doc [\n"
+ " <!ENTITY ge1 'head1Ztail1'>\n"
+ " <!ENTITY ge2 'head2&ge1;tail2'>\n"
+ " <!ENTITY ge3 'head3&ge2;tail3'>\n"
+ "]>\n"
+ "<doc>&ge3;</doc>";
+ const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
+ XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
+ CharData storage;
+ CharData_Init(&storage);
+ XML_Parser parser = XML_ParserCreate(NULL);
+ ParserPlusStorage parserPlusStorage = {parser, &storage};
+
+ XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
+ XML_SetUserData(parser, &parserPlusStorage);
+
+ enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
+ while (status == XML_STATUS_SUSPENDED) {
+ status = XML_ResumeParser(parser);
+ }
+ if (status != XML_STATUS_OK)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+}
+END_TEST
+
/* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_take_linear_time) {
const char *const too_slow_failure_message
@@ -8349,6 +8625,29 @@ duff_reallocator(void *ptr, size_t size) {
return realloc(ptr, size);
}
+// Portable remake of strndup(3) for C99; does not care about space efficiency
+static char *
+portable_strndup(const char *s, size_t n) {
+ if ((s == NULL) || (n == SIZE_MAX)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ char *const buffer = (char *)malloc(n + 1);
+ if (buffer == NULL) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ errno = 0;
+
+ memcpy(buffer, s, n);
+
+ buffer[n] = '\0';
+
+ return buffer;
+}
+
/* Test that a failure to allocate the parser structure fails gracefully */
START_TEST(test_misc_alloc_create_parser) {
XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
@@ -8723,7 +9022,7 @@ END_TEST
START_TEST(test_misc_stopparser_rejects_unstarted_parser) {
const XML_Bool cases[] = {XML_TRUE, XML_FALSE};
for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
- const XML_Bool resumable = cases[i];
+ resumable = cases[i];
XML_Parser parser = XML_ParserCreate(NULL);
assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE);
assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR);
@@ -8733,6 +9032,105 @@ START_TEST(test_misc_stopparser_rejects_unstarted_parser) {
}
END_TEST
+/* Adaptation of accumulate_characters that takes ExtHdlrData input to work with
+ * test_renter_loop_finite_content below */
+static void XMLCALL
+accumulate_characters_ext_handler(void *userData, const XML_Char *s, int len) {
+ ExtHdlrData *const test_data = (ExtHdlrData *)userData;
+ CharData_AppendXMLChars(test_data->storage, s, len);
+}
+
+/* Test that internalEntityProcessor does not re-enter forever;
+ * based on files tests/xmlconf/xmltest/valid/ext-sa/012.{xml,ent} */
+START_TEST(test_renter_loop_finite_content) {
+ CharData storage;
+ CharData_Init(&storage);
+ const char *const text = "<!DOCTYPE doc [\n"
+ "<!ENTITY e1 '&e2;'>\n"
+ "<!ENTITY e2 '&e3;'>\n"
+ "<!ENTITY e3 SYSTEM '012.ent'>\n"
+ "<!ENTITY e4 '&e5;'>\n"
+ "<!ENTITY e5 '(e5)'>\n"
+ "<!ELEMENT doc (#PCDATA)>\n"
+ "]>\n"
+ "<doc>&e1;</doc>\n";
+ ExtHdlrData test_data = {"&e4;\n", external_entity_null_loader, &storage};
+ const XML_Char *const expected = XCS("(e5)\n");
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+ XML_SetUserData(parser, &test_data);
+ XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader);
+ XML_SetCharacterDataHandler(parser, accumulate_characters_ext_handler);
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+}
+END_TEST
+
+// Inspired by function XML_OriginalString of Perl's XML::Parser
+static char *
+dup_original_string(XML_Parser parser) {
+ const int byte_count = XML_GetCurrentByteCount(parser);
+
+ assert_true(byte_count >= 0);
+
+ int offset = -1;
+ int size = -1;
+
+ const char *const context = XML_GetInputContext(parser, &offset, &size);
+
+#if XML_CONTEXT_BYTES > 0
+ assert_true(context != NULL);
+ assert_true(offset >= 0);
+ assert_true(size >= 0);
+ return portable_strndup(context + offset, byte_count);
+#else
+ assert_true(context == NULL);
+ return NULL;
+#endif
+}
+
+static void
+on_characters_issue_980(void *userData, const XML_Char *s, int len) {
+ (void)s;
+ (void)len;
+ XML_Parser parser = (XML_Parser)userData;
+
+ char *const original_string = dup_original_string(parser);
+
+#if XML_CONTEXT_BYTES > 0
+ assert_true(original_string != NULL);
+ assert_true(strcmp(original_string, "&draft.day;") == 0);
+ free(original_string);
+#else
+ assert_true(original_string == NULL);
+#endif
+}
+
+START_TEST(test_misc_expected_event_ptr_issue_980) {
+ // NOTE: This is a tiny subset of sample "REC-xml-19980210.xml"
+ // from Perl's XML::Parser
+ const char *const doc = "<!DOCTYPE day [\n"
+ " <!ENTITY draft.day '10'>\n"
+ "]>\n"
+ "<day>&draft.day;</day>\n";
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ XML_SetUserData(parser, parser);
+ XML_SetCharacterDataHandler(parser, on_characters_issue_980);
+
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
+ /*isFinal=*/XML_TRUE)
+ == XML_STATUS_OK);
+
+ XML_ParserFree(parser);
+}
+END_TEST
+
static void
alloc_setup(void) {
XML_Memory_Handling_Suite memsuite = {duff_allocator, duff_reallocator, free};
@@ -9311,6 +9709,31 @@ START_TEST(test_alloc_internal_entity) {
}
END_TEST
+START_TEST(test_alloc_parameter_entity) {
+ const char *text = "<!DOCTYPE foo ["
+ "<!ENTITY % param1 \"<!ENTITY internal 'some_text'>\">"
+ "%param1;"
+ "]> <foo>&internal;content</foo>";
+ int i;
+ const int alloc_test_max_repeats = 30;
+
+ for (i = 0; i < alloc_test_max_repeats; i++) {
+ allocation_count = i;
+ XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ != XML_STATUS_ERROR)
+ break;
+ alloc_teardown();
+ alloc_setup();
+ }
+ allocation_count = -1;
+ if (i == 0)
+ fail("Parameter entity processed despite duff allocator");
+ if (i == alloc_test_max_repeats)
+ fail("Parameter entity not processed at max allocation count");
+}
+END_TEST
+
/* Test the robustness against allocation failure of element handling
* Based on test_dtd_default_handling().
*/
@@ -12955,6 +13378,7 @@ make_suite(void) {
test_wfc_undeclared_entity_with_external_subset_standalone);
tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs);
+ tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
@@ -13137,7 +13561,13 @@ make_suite(void) {
tcase_add_test(tc_basic, test_empty_element_abort);
tcase_add_test__ifdef_xml_dtd(tc_basic,
test_pool_integrity_with_unfinished_attr);
+ tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_ref_no_elements);
+ tcase_add_test__ifdef_xml_dtd(tc_basic, test_deep_nested_entity);
+ tcase_add_test__ifdef_xml_dtd(tc_basic, test_deep_nested_attribute_entity);
+ tcase_add_test__ifdef_xml_dtd(tc_basic,
+ test_deep_nested_entity_delayed_interpretation);
tcase_add_test(tc_basic, test_nested_entity_suspend);
+ tcase_add_test__ifdef_xml_dtd(tc_basic, test_nested_entity_suspend_2);
tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
tcase_add_test(tc_basic, test_set_reparse_deferral);
tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
@@ -13200,6 +13630,8 @@ make_suite(void) {
tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing);
tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser);
+ tcase_add_test__ifdef_xml_dtd(tc_misc, test_renter_loop_finite_content);
+ tcase_add_test(tc_misc, test_misc_expected_event_ptr_issue_980);
suite_add_tcase(s, tc_alloc);
tcase_add_checked_fixture(tc_alloc, alloc_setup, alloc_teardown);
@@ -13216,6 +13648,7 @@ make_suite(void) {
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_external_entity);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_ext_entity_set_encoding);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_internal_entity);
+ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_parameter_entity);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_dtd_default_handling);
tcase_add_test(tc_alloc, test_alloc_explicit_encoding);
tcase_add_test(tc_alloc, test_alloc_set_base);