commit c0de4903900004dd3ca91f246e5f6489a49a132b Author: Tomas Korbar Date: Mon Mar 24 10:04:33 2025 +0100 Fix CVE-2024-8176 diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c index 8b2af91..d68d2c8 100644 --- a/expat/lib/xmlparse.c +++ b/expat/lib/xmlparse.c @@ -305,6 +305,10 @@ typedef struct { const XML_Char *publicId; const XML_Char *notation; XML_Bool open; + XML_Bool hasMore; /* true if entity has not been completely processed */ + /* An entity can be open while being already completely processed (hasMore == + XML_FALSE). The reason is the delayed closing of entities until their inner + entities are processed and closed */ XML_Bool is_param; XML_Bool is_internal; /* true if declared in internal subset outside PE */ } ENTITY; @@ -395,6 +399,12 @@ typedef struct { int *scaffIndex; } DTD; +enum EntityType { + ENTITY_INTERNAL, + ENTITY_ATTRIBUTE, + ENTITY_VALUE, +}; + typedef struct open_internal_entity { const char *internalEventPtr; const char *internalEventEndPtr; @@ -402,6 +412,7 @@ typedef struct open_internal_entity { ENTITY *entity; int startTagLevel; XML_Bool betweenDecl; /* WFC: PE Between Declarations */ + enum EntityType type; } OPEN_INTERNAL_ENTITY; enum XML_Account { @@ -461,8 +472,8 @@ static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *next, const char **nextPtr, XML_Bool haveMore, XML_Bool allowClosingDoctype, enum XML_Account account); -static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, - XML_Bool betweenDecl); +static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl, enum EntityType type); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr, @@ -492,16 +503,21 @@ static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, const char *, STRING_POOL *, enum XML_Account account); -static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, - XML_Bool isCdata, const char *, - const char *, STRING_POOL *, - enum XML_Account account); +static enum XML_Error +appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account, const char **nextPtr); static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end, - enum XML_Account account); + enum XML_Account account, + const char **nextPtr); +static enum XML_Error callStoreEntityValue(XML_Parser parser, + const ENCODING *enc, + const char *start, const char *end, + enum XML_Account account); static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportComment(XML_Parser parser, const ENCODING *enc, @@ -669,6 +685,10 @@ struct XML_ParserStruct { const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; OPEN_INTERNAL_ENTITY *m_freeInternalEntities; + OPEN_INTERNAL_ENTITY *m_openAttributeEntities; + OPEN_INTERNAL_ENTITY *m_freeAttributeEntities; + OPEN_INTERNAL_ENTITY *m_openValueEntities; + OPEN_INTERNAL_ENTITY *m_freeValueEntities; XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; @@ -716,6 +736,7 @@ struct XML_ParserStruct { ACCOUNTING m_accounting; ENTITY_STATS m_entity_stats; #endif + XML_Bool m_reenter; }; #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) @@ -986,7 +1007,29 @@ callProcessor(XML_Parser parser, const char *start, const char *end, } } g_parseAttempts += 1; - const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); + // Run in a loop to eliminate dangerous recursion depths + enum XML_Error ret; + *endPtr = start; + while (1) { + // Use endPtr as the new start in each iteration, since it will + // be set to the next start point by m_processor. + ret = parser->m_processor(parser, *endPtr, end, endPtr); + + // Make parsing status (and in particular XML_SUSPENDED) take + // precedence over re-enter flag when they disagree + if (parser->m_parsingStatus.parsing != XML_PARSING) { + parser->m_reenter = XML_FALSE; + } + + if (! parser->m_reenter) { + break; + } + + parser->m_reenter = XML_FALSE; + if (ret != XML_ERROR_NONE) + return ret; + } + if (ret == XML_ERROR_NONE) { // if we consumed nothing, remember what we had on this parse attempt. if (*endPtr == start) { @@ -1097,6 +1140,8 @@ parserCreate(const XML_Char *encodingName, parser->m_freeBindingList = NULL; parser->m_freeTagList = NULL; parser->m_freeInternalEntities = NULL; + parser->m_freeAttributeEntities = NULL; + parser->m_freeValueEntities = NULL; parser->m_groupSize = 0; parser->m_groupConnector = NULL; @@ -1199,6 +1244,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_eventEndPtr = NULL; parser->m_positionPtr = NULL; parser->m_openInternalEntities = NULL; + parser->m_openAttributeEntities = NULL; + parser->m_openValueEntities = NULL; parser->m_defaultExpandInternalEntities = XML_TRUE; parser->m_tagLevel = 0; parser->m_tagStack = NULL; @@ -1209,6 +1256,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_unknownEncodingData = NULL; parser->m_parentParser = NULL; parser->m_parsingStatus.parsing = XML_INITIALIZED; + // Reentry can only be triggered inside m_processor calls + parser->m_reenter = XML_FALSE; #ifdef XML_DTD parser->m_isParamEntity = XML_FALSE; parser->m_useForeignDTD = XML_FALSE; @@ -1268,6 +1317,24 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; } + /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but + * for attributes) */ + openEntityList = parser->m_openAttributeEntities; + while (openEntityList) { + OPEN_INTERNAL_ENTITY *openEntity = openEntityList; + openEntityList = openEntity->next; + openEntity->next = parser->m_freeAttributeEntities; + parser->m_freeAttributeEntities = openEntity; + } + /* move m_openValueEntities to m_freeValueEntities (i.e. same task but + * for value entities) */ + openEntityList = parser->m_openValueEntities; + while (openEntityList) { + OPEN_INTERNAL_ENTITY *openEntity = openEntityList; + openEntityList = openEntity->next; + openEntity->next = parser->m_freeValueEntities; + parser->m_freeValueEntities = openEntity; + } moveToFreeBindingList(parser, parser->m_inheritedBindings); FREE(parser, parser->m_unknownEncodingMem); if (parser->m_unknownEncodingRelease) @@ -1281,6 +1348,19 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { return XML_TRUE; } +static XML_Bool +parserBusy(XML_Parser parser) { + switch (parser->m_parsingStatus.parsing) { + case XML_PARSING: + case XML_SUSPENDED: + return XML_TRUE; + case XML_INITIALIZED: + case XML_FINISHED: + default: + return XML_FALSE; + } +} + enum XML_Status XMLCALL XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { if (parser == NULL) @@ -1289,8 +1369,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { XXX There's no way for the caller to determine which of the XXX possible error cases caused the XML_STATUS_ERROR return. */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return XML_STATUS_ERROR; /* Get rid of any previous encoding name */ @@ -1527,7 +1606,34 @@ XML_ParserFree(XML_Parser parser) { entityList = entityList->next; FREE(parser, openEntity); } - + /* free m_openAttributeEntities and m_freeAttributeEntities */ + entityList = parser->m_openAttributeEntities; + for (;;) { + OPEN_INTERNAL_ENTITY *openEntity; + if (entityList == NULL) { + if (parser->m_freeAttributeEntities == NULL) + break; + entityList = parser->m_freeAttributeEntities; + parser->m_freeAttributeEntities = NULL; + } + openEntity = entityList; + entityList = entityList->next; + FREE(parser, openEntity); + } + /* free m_openValueEntities and m_freeValueEntities */ + entityList = parser->m_openValueEntities; + for (;;) { + OPEN_INTERNAL_ENTITY *openEntity; + if (entityList == NULL) { + if (parser->m_freeValueEntities == NULL) + break; + entityList = parser->m_freeValueEntities; + parser->m_freeValueEntities = NULL; + } + openEntity = entityList; + entityList = entityList->next; + FREE(parser, openEntity); + } destroyBindings(parser->m_freeBindingList, parser); destroyBindings(parser->m_inheritedBindings, parser); poolDestroy(&parser->m_tempPool); @@ -1569,8 +1675,7 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { return XML_ERROR_INVALID_ARGUMENT; #ifdef XML_DTD /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; parser->m_useForeignDTD = useDTD; return XML_ERROR_NONE; @@ -1585,8 +1690,7 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { if (parser == NULL) return; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return; parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } @@ -1855,8 +1959,7 @@ XML_SetParamEntityParsing(XML_Parser parser, if (parser == NULL) return 0; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return 0; #ifdef XML_DTD parser->m_paramEntityParsing = peParsing; @@ -1873,8 +1976,7 @@ XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { if (parser->m_parentParser) return XML_SetHashSalt(parser->m_parentParser, hash_salt); /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return 0; parser->m_hash_secret_salt = hash_salt; return 1; @@ -2188,6 +2290,11 @@ XML_GetBuffer(XML_Parser parser, int len) { return parser->m_bufferEnd; } +static void +triggerReenter(XML_Parser parser) { + parser->m_reenter = XML_TRUE; +} + enum XML_Status XMLCALL XML_StopParser(XML_Parser parser, XML_Bool resumable) { if (parser == NULL) @@ -2659,8 +2766,9 @@ static enum XML_Error PTRCALL contentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doContent( - parser, 0, parser->m_encoding, start, end, endPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); + parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end, + endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_DIRECT); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; @@ -2748,6 +2856,11 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start, return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE + } + /* Fall through */ default: start = next; } @@ -2921,7 +3034,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, reportDefault(parser, enc, s, next); break; } - result = processInternalEntity(parser, entity, XML_FALSE); + result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL); if (result != XML_ERROR_NONE) return result; } else if (parser->m_externalEntityRefHandler) { @@ -3047,7 +3160,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { - if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_SUSPENDED + || (parser->m_parsingStatus.parsing == XML_PARSING + && parser->m_reenter)) parser->m_processor = epilogProcessor; else return epilogProcessor(parser, next, end, nextPtr); @@ -3108,7 +3223,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { - if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_SUSPENDED + || (parser->m_parsingStatus.parsing == XML_PARSING + && parser->m_reenter)) parser->m_processor = epilogProcessor; else return epilogProcessor(parser, next, end, nextPtr); @@ -3241,14 +3358,22 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, break; /* LCOV_EXCL_STOP */ } - *eventPP = s = next; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: + *eventPP = next; *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: + *eventPP = next; return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + *nextPtr = next; + return XML_ERROR_NONE; + } + /* Fall through */ default:; + *eventPP = s = next; } } /* not reached */ @@ -4165,14 +4290,21 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, /* LCOV_EXCL_STOP */ } - *eventPP = s = next; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: + *eventPP = next; *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: + *eventPP = next; return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE + } + /* Fall through */ default:; + *eventPP = s = next; } } /* not reached */ @@ -4504,7 +4636,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, } /* found end of entity value - can store it now */ return storeEntityValue(parser, parser->m_encoding, s, end, - XML_ACCOUNT_DIRECT); + XML_ACCOUNT_DIRECT, NULL); } else if (tok == XML_TOK_XML_DECL) { enum XML_Error result; result = processXmlDecl(parser, 0, start, next); @@ -4631,7 +4763,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end, break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL); } start = next; } @@ -5069,9 +5201,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { - enum XML_Error result - = storeEntityValue(parser, enc, s + enc->minBytesPerChar, - next - enc->minBytesPerChar, XML_ACCOUNT_NONE); + enum XML_Error result = callStoreEntityValue( + parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, + XML_ACCOUNT_NONE); if (parser->m_declEntity) { parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); parser->m_declEntity->textLen @@ -5467,7 +5599,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, enum XML_Error result; XML_Bool betweenDecl = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); - result = processInternalEntity(parser, entity, betweenDecl); + result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL); if (result != XML_ERROR_NONE) return result; handleDefault = XML_FALSE; @@ -5672,6 +5804,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + *nextPtr = next; + return XML_ERROR_NONE; + } + /* Fall through */ default: s = next; tok = XmlPrologTok(enc, s, end, &next); @@ -5739,28 +5877,58 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, default: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } - parser->m_eventPtr = s = next; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: + parser->m_eventPtr = next; *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: + parser->m_eventPtr = next; return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE + } + /* Fall through */ default:; + parser->m_eventPtr = s = next; } } } static enum XML_Error -processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { - const char *textStart, *textEnd; - const char *next; - enum XML_Error result; - OPEN_INTERNAL_ENTITY *openEntity; +processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, + enum EntityType type) { + OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList; + switch (type) { + case ENTITY_INTERNAL: + parser->m_processor = internalEntityProcessor; + openEntityList = &parser->m_openInternalEntities; + freeEntityList = &parser->m_freeInternalEntities; + break; + case ENTITY_ATTRIBUTE: + openEntityList = &parser->m_openAttributeEntities; + freeEntityList = &parser->m_freeAttributeEntities; + break; + case ENTITY_VALUE: + openEntityList = &parser->m_openValueEntities; + freeEntityList = &parser->m_freeValueEntities; + break; + /* default case serves merely as a safety net in case of a + * wrong entityType. Therefore we exclude the following lines + * from the test coverage. + * + * LCOV_EXCL_START + */ + default: + // Should not reach here + assert(0); + /* LCOV_EXCL_STOP */ + } - if (parser->m_freeInternalEntities) { - openEntity = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = openEntity->next; + if (*freeEntityList) { + openEntity = *freeEntityList; + *freeEntityList = openEntity->next; } else { openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); @@ -5768,56 +5936,33 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; + entity->hasMore = XML_TRUE; #ifdef XML_DTD entityTrackingOnOpen(parser, entity, __LINE__); #endif entity->processed = 0; - openEntity->next = parser->m_openInternalEntities; - parser->m_openInternalEntities = openEntity; + openEntity->next = *openEntityList; + *openEntityList = openEntity; openEntity->entity = entity; + openEntity->type = type; openEntity->startTagLevel = parser->m_tagLevel; openEntity->betweenDecl = betweenDecl; openEntity->internalEventPtr = NULL; openEntity->internalEventEndPtr = NULL; - textStart = (const char *)entity->textPtr; - textEnd = (const char *)(entity->textPtr + entity->textLen); - /* Set a safe default value in case 'next' does not get set */ - next = textStart; - -#ifdef XML_DTD - if (entity->is_param) { - int tok - = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_FALSE, - XML_ACCOUNT_ENTITY_EXPANSION); - } else -#endif /* XML_DTD */ - result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, - textStart, textEnd, &next, XML_FALSE, - XML_ACCOUNT_ENTITY_EXPANSION); - - if (result == XML_ERROR_NONE) { - if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { - entity->processed = (int)(next - textStart); - parser->m_processor = internalEntityProcessor; - } else { -#ifdef XML_DTD - entityTrackingOnClose(parser, entity, __LINE__); -#endif /* XML_DTD */ - entity->open = XML_FALSE; - parser->m_openInternalEntities = openEntity->next; - /* put openEntity back in list of free instances */ - openEntity->next = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = openEntity; - } + // Only internal entities make use of the reenter flag + // therefore no need to set it for other entity types + if (type == ENTITY_INTERNAL) { + triggerReenter(parser); } - return result; + return XML_ERROR_NONE; } static enum XML_Error PTRCALL internalEntityProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { + UNUSED_P(s); + UNUSED_P(end); + UNUSED_P(nextPtr); ENTITY *entity; const char *textStart, *textEnd; const char *next; @@ -5827,72 +5972,63 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, return XML_ERROR_UNEXPECTED_STATE; entity = openEntity->entity; - textStart = ((const char *)entity->textPtr) + entity->processed; - textEnd = (const char *)(entity->textPtr + entity->textLen); - /* Set a safe default value in case 'next' does not get set */ - next = textStart; - -#ifdef XML_DTD - if (entity->is_param) { - int tok - = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_TRUE, - XML_ACCOUNT_ENTITY_EXPANSION); - } else -#endif /* XML_DTD */ - result = doContent(parser, openEntity->startTagLevel, - parser->m_internalEncoding, textStart, textEnd, &next, - XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); - - if (result != XML_ERROR_NONE) - return result; - - if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { - entity->processed = (int)(next - (const char *)entity->textPtr); + // This will return early + if (entity->hasMore) { + textStart = ((const char *)entity->textPtr) + entity->processed; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; + + if (entity->is_param) { + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else { + result = doContent(parser, openEntity->startTagLevel, + parser->m_internalEncoding, textStart, textEnd, &next, + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); + } + + if (result != XML_ERROR_NONE) + return result; + // Check if entity is complete, if not, mark down how much of it is + // processed + if (textEnd != next + && (parser->m_parsingStatus.parsing == XML_SUSPENDED + || (parser->m_parsingStatus.parsing == XML_PARSING + && parser->m_reenter))) { + entity->processed = (int)(next - (const char *)entity->textPtr); + return result; + } + // Entity is complete. We cannot close it here since we need to first + // process its possible inner entities (which are added to the + // m_openInternalEntities during doProlog or doContent calls above) + entity->hasMore = XML_FALSE; + triggerReenter(parser); return result; - } - + } // End of entity processing, "if" block will return here + // Remove fully processed openEntity from open entity list. #ifdef XML_DTD entityTrackingOnClose(parser, entity, __LINE__); #endif + // openEntity is m_openInternalEntities' head, as we set it at the start of + // this function and we skipped doProlog and doContent calls with hasMore set + // to false. This means we can directly remove the head of + // m_openInternalEntities + assert(parser->m_openInternalEntities == openEntity); entity->open = XML_FALSE; - parser->m_openInternalEntities = openEntity->next; + parser->m_openInternalEntities = parser->m_openInternalEntities->next; /* put openEntity back in list of free instances */ openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; - // If there are more open entities we want to stop right here and have the - // upcoming call to XML_ResumeParser continue with entity content, or it would - // be ignored altogether. - if (parser->m_openInternalEntities != NULL - && parser->m_parsingStatus.parsing == XML_SUSPENDED) { - return XML_ERROR_NONE; - } - -#ifdef XML_DTD - if (entity->is_param) { - int tok; - parser->m_processor = prologProcessor; - tok = XmlPrologTok(parser->m_encoding, s, end, &next); - return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, - XML_ACCOUNT_DIRECT); - } else -#endif /* XML_DTD */ - { - parser->m_processor = contentProcessor; - /* see externalEntityContentProcessor vs contentProcessor */ - result = doContent(parser, parser->m_parentParser ? 1 : 0, - parser->m_encoding, s, end, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, - XML_ACCOUNT_DIRECT); - if (result == XML_ERROR_NONE) { - if (! storeRawNames(parser)) - return XML_ERROR_NO_MEMORY; - } - return result; + if (parser->m_openInternalEntities == NULL) { + parser->m_processor = entity->is_param ? prologProcessor : contentProcessor; } + triggerReenter(parser); + return XML_ERROR_NONE; } static enum XML_Error PTRCALL @@ -5908,8 +6044,70 @@ static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *ptr, const char *end, STRING_POOL *pool, enum XML_Account account) { - enum XML_Error result - = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); + const char *next = ptr; + enum XML_Error result = XML_ERROR_NONE; + + while (1) { + if (! parser->m_openAttributeEntities) { + result = appendAttributeValue(parser, enc, isCdata, next, end, pool, + account, &next); + } else { + OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities; + if (! openEntity) + return XML_ERROR_UNEXPECTED_STATE; + + ENTITY *const entity = openEntity->entity; + const char *const textStart + = ((const char *)entity->textPtr) + entity->processed; + const char *const textEnd + = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + const char *nextInEntity = textStart; + if (entity->hasMore) { + result = appendAttributeValue( + parser, parser->m_internalEncoding, isCdata, textStart, textEnd, + pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity); + if (result != XML_ERROR_NONE) + break; + // Check if entity is complete, if not, mark down how much of it is + // processed. A XML_SUSPENDED check here is not required as + // appendAttributeValue will never suspend the parser. + if (textEnd != nextInEntity) { + entity->processed + = (int)(nextInEntity - (const char *)entity->textPtr); + continue; + } + + // Entity is complete. We cannot close it here since we need to first + // process its possible inner entities (which are added to the + // m_openAttributeEntities during appendAttributeValue) + entity->hasMore = XML_FALSE; + continue; + } // End of entity processing, "if" block skips the rest + + // Remove fully processed openEntity from open entity list. +#if XML_DTD == 1 + entityTrackingOnClose(parser, entity, __LINE__); +#endif + // openEntity is m_openAttributeEntities' head, since we set it at the + // start of this function and because we skipped appendAttributeValue call + // with hasMore set to false. This means we can directly remove the head + // of m_openAttributeEntities + assert(parser->m_openAttributeEntities == openEntity); + entity->open = XML_FALSE; + parser->m_openAttributeEntities = parser->m_openAttributeEntities->next; + + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeAttributeEntities; + parser->m_freeAttributeEntities = openEntity; + } + + // Break if an error occurred or there is nothing left to process + if (result || (parser->m_openAttributeEntities == NULL && end == next)) { + break; + } + } + if (result) return result; if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) @@ -5922,7 +6120,7 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *ptr, const char *end, STRING_POOL *pool, - enum XML_Account account) { + enum XML_Account account, const char **nextPtr) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ #ifndef XML_DTD UNUSED_P(account); @@ -5940,6 +6138,9 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, #endif switch (tok) { case XML_TOK_NONE: + if (nextPtr) { + *nextPtr = next; + } return XML_ERROR_NONE; case XML_TOK_INVALID: if (enc == parser->m_encoding) @@ -6080,21 +6281,11 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; } else { enum XML_Error result; - const XML_Char *textEnd = entity->textPtr + entity->textLen; - entity->open = XML_TRUE; -#ifdef XML_DTD - entityTrackingOnOpen(parser, entity, __LINE__); -#endif - result = appendAttributeValue(parser, parser->m_internalEncoding, - isCdata, (const char *)entity->textPtr, - (const char *)textEnd, pool, - XML_ACCOUNT_ENTITY_EXPANSION); -#ifdef XML_DTD - entityTrackingOnClose(parser, entity, __LINE__); -#endif - entity->open = XML_FALSE; - if (result) - return result; + result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE); + if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) { + *nextPtr = next; + } + return result; } } break; default: @@ -6122,7 +6313,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *entityTextPtr, const char *entityTextEnd, - enum XML_Account account) { + enum XML_Account account, const char **nextPtr) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; @@ -6140,8 +6331,9 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, return XML_ERROR_NO_MEMORY; } + const char *next; for (;;) { - const char *next + next = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); @@ -6205,16 +6397,8 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, } else dtd->keepProcessing = dtd->standalone; } else { - entity->open = XML_TRUE; - entityTrackingOnOpen(parser, entity, __LINE__); - result = storeEntityValue( - parser, parser->m_internalEncoding, (const char *)entity->textPtr, - (const char *)(entity->textPtr + entity->textLen), - XML_ACCOUNT_ENTITY_EXPANSION); - entityTrackingOnClose(parser, entity, __LINE__); - entity->open = XML_FALSE; - if (result) - goto endEntityValue; + result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE); + goto endEntityValue; } break; } @@ -6302,9 +6486,84 @@ endEntityValue: #ifdef XML_DTD parser->m_prologState.inEntityValue = oldInEntityValue; #endif /* XML_DTD */ + // If 'nextPtr' is given, it should be updated during the processing + if (nextPtr != NULL) { + *nextPtr = next; + } return result; } +static enum XML_Error +callStoreEntityValue(XML_Parser parser, const ENCODING *enc, + const char *entityTextPtr, const char *entityTextEnd, + enum XML_Account account) { + const char *next = entityTextPtr; + enum XML_Error result = XML_ERROR_NONE; + while (1) { + if (! parser->m_openValueEntities) { + result + = storeEntityValue(parser, enc, next, entityTextEnd, account, &next); + } else { + OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities; + if (! openEntity) + return XML_ERROR_UNEXPECTED_STATE; + + ENTITY *const entity = openEntity->entity; + const char *const textStart + = ((const char *)entity->textPtr) + entity->processed; + const char *const textEnd + = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + const char *nextInEntity = textStart; + if (entity->hasMore) { + result = storeEntityValue(parser, parser->m_internalEncoding, textStart, + textEnd, XML_ACCOUNT_ENTITY_EXPANSION, + &nextInEntity); + if (result != XML_ERROR_NONE) + break; + // Check if entity is complete, if not, mark down how much of it is + // processed. A XML_SUSPENDED check here is not required as + // appendAttributeValue will never suspend the parser. + if (textEnd != nextInEntity) { + entity->processed + = (int)(nextInEntity - (const char *)entity->textPtr); + continue; + } + + // Entity is complete. We cannot close it here since we need to first + // process its possible inner entities (which are added to the + // m_openValueEntities during storeEntityValue) + entity->hasMore = XML_FALSE; + continue; + } // End of entity processing, "if" block skips the rest + + // Remove fully processed openEntity from open entity list. +# if XML_DTD == 1 + entityTrackingOnClose(parser, entity, __LINE__); +# endif + // openEntity is m_openValueEntities' head, since we set it at the + // start of this function and because we skipped storeEntityValue call + // with hasMore set to false. This means we can directly remove the head + // of m_openValueEntities + assert(parser->m_openValueEntities == openEntity); + entity->open = XML_FALSE; + parser->m_openValueEntities = parser->m_openValueEntities->next; + + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeValueEntities; + parser->m_freeValueEntities = openEntity; + } + + // Break if an error occurred or there is nothing left to process + if (result + || (parser->m_openValueEntities == NULL && entityTextEnd == next)) { + break; + } + } + + return result; +} + static void FASTCALL normalizeLines(XML_Char *s) { XML_Char *p; diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index 5769aa0..5db384d 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -47,6 +47,8 @@ #endif #include +#include +#include // for SIZE_MAX #include #include #include @@ -1788,6 +1790,89 @@ START_TEST(test_wfc_no_recursive_entity_refs) { } END_TEST +START_TEST(test_no_indirectly_recursive_entity_refs) { + struct TestCase { + const char *doc; + bool usesParameterEntities; + }; + + const struct TestCase cases[] = { + // general entity + character data + {"\n" + " \n" + "]>&e2;\n", + false}, + + // general entity + attribute value + {"\n" + " \n" + "]>\n", + false}, + + // parameter entity + {"\n" + " \n" + " \">\n" + " %define_g;\n" + "]>\n" + "\n", + true}, + }; + const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE}; + + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { + for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]); + j++) { + const XML_Bool reset_wanted = reset_or_not[j]; + const char *const doc = cases[i].doc; + const bool usesParameterEntities = cases[i].usesParameterEntities; + +#ifdef XML_DTD // both GE and DTD + const bool rejection_expected = true; +#else // neither DTD nor GE + const bool rejection_expected = false; +#endif + + XML_Parser parser = XML_ParserCreate(NULL); + +#ifdef XML_DTD + if (usesParameterEntities) { + assert_true( + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS) + == 1); + } +#else + UNUSED_P(usesParameterEntities); +#endif // XML_DTD + + const enum XML_Status status + = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), + /*isFinal*/ XML_TRUE); + + if (rejection_expected) { + assert_true(status == XML_STATUS_ERROR); + assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF); + } else { + assert_true(status == XML_STATUS_OK); + } + + if (reset_wanted) { + // This covers free'ing of (eventually) all three open entity lists by + // XML_ParserReset. + XML_ParserReset(parser, NULL); + } + + // This covers free'ing of (eventually) all three open entity lists by + // XML_ParserFree (unless XML_ParserReset has already done that above). + XML_ParserFree(parser); + } + } +} +END_TEST + START_TEST(test_recursive_external_parameter_entity_2) { struct TestCase { const char *doc; @@ -2958,6 +3043,12 @@ typedef struct elementInfo { AttrInfo *attributes; } ElementInfo; + +typedef struct StructParserAndElementInfo { + XML_Parser parser; + ElementInfo *info; +} ParserAndElementInfo; + static void XMLCALL counting_start_element_handler(void *userData, const XML_Char *name, const XML_Char **atts) { @@ -5306,6 +5397,7 @@ END_TEST typedef struct ext_hdlr_data { const char *parse_text; XML_ExternalEntityRefHandler handler; + CharData *storage; } ExtHdlrData; static int XMLCALL @@ -5340,7 +5432,7 @@ START_TEST(test_skipped_null_loaded_ext_entity) { = {"\n" "\n" "%pe2;\n", - external_entity_null_loader}; + external_entity_null_loader, NULL}; XML_SetUserData(g_parser, &test_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); @@ -5358,7 +5450,7 @@ START_TEST(test_skipped_unloaded_ext_entity) { = {"\n" "\n" "%pe2;\n", - NULL}; + NULL, NULL}; XML_SetUserData(g_parser, &test_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); @@ -6448,6 +6540,25 @@ accumulate_entity_decl(void *userData, const XML_Char *entityName, CharData_AppendXMLChars(storage, XCS("\n"), 1); } +typedef struct { + XML_Parser parser; + CharData *storage; +} ParserPlusStorage; + +static void XMLCALL +accumulate_char_data_and_suspend(void *userData, const XML_Char *s, int len) { + ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData; + + CharData_AppendXMLChars(parserPlusStorage->storage, s, len); + + for (int i = 0; i < len; i++) { + if (s[i] == 'Z') { + XML_StopParser(parserPlusStorage->parser, /*resumable=*/XML_TRUE); + break; + } + } +} + START_TEST(test_utf16_pe) { /* '> @@ -6901,11 +7012,6 @@ START_TEST(test_pool_integrity_with_unfinished_attr) { } END_TEST -typedef struct { - XML_Parser parser; - CharData *storage; -} ParserPlusStorage; - static void XMLCALL accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) { ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData; @@ -6913,6 +7019,147 @@ accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) { XML_StopParser(parserPlusStorage->parser, XML_TRUE); } +/* Test a possible early return location in internalEntityProcessor */ +START_TEST(test_entity_ref_no_elements) { + const char *const text = "\n" + "]> &e1;"; // intentionally missing newline + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR); + assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS); + XML_ParserFree(parser); +} +END_TEST + +/* Tests if chained entity references lead to unbounded recursion */ +START_TEST(test_deep_nested_entity) { + const size_t N_LINES = 60000; + const size_t SIZE_PER_LINE = 50; + + char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); + if (text == NULL) { + fail("malloc failed"); + } + + char *textPtr = text; + + // Create the XML + textPtr += snprintf(textPtr, SIZE_PER_LINE, + "\n"); + + for (size_t i = 1; i < N_LINES; ++i) { + textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n", + (long unsigned)i, (long unsigned)(i - 1)); + } + + snprintf(textPtr, SIZE_PER_LINE, "]> &s%lu;\n", + (long unsigned)(N_LINES - 1)); + + const XML_Char *const expected = XCS("deepText"); + + CharData storage; + CharData_Init(&storage); + + XML_Parser parser = XML_ParserCreate(NULL); + + XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + + if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(parser); + + CharData_CheckXMLChars(&storage, expected); + XML_ParserFree(parser); + free(text); +} +END_TEST + +/* Tests if chained entity references in attributes +lead to unbounded recursion */ +START_TEST(test_deep_nested_attribute_entity) { + const size_t N_LINES = 60000; + const size_t SIZE_PER_LINE = 100; + + char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); + if (text == NULL) { + fail("malloc failed"); + } + + char *textPtr = text; + + // Create the XML + textPtr += snprintf(textPtr, SIZE_PER_LINE, + "\n"); + + for (size_t i = 1; i < N_LINES; ++i) { + textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n", + (long unsigned)i, (long unsigned)(i - 1)); + } + + snprintf(textPtr, SIZE_PER_LINE, "]> mainText\n", + (long unsigned)(N_LINES - 1)); + + AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}}; + ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}}; + info[0].attributes = doc_info; + + XML_SetStartElementHandler(g_parser, counting_start_element_handler); + XML_SetUserData(g_parser, &info); + + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); + + free(text); +} +END_TEST + +START_TEST(test_deep_nested_entity_delayed_interpretation) { + const size_t N_LINES = 70000; + const size_t SIZE_PER_LINE = 100; + + char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); + if (text == NULL) { + fail("malloc failed"); + } + + char *textPtr = text; + + // Create the XML + textPtr += snprintf(textPtr, SIZE_PER_LINE, + "\n"); + + for (size_t i = 1; i < N_LINES; ++i) { + textPtr += snprintf(textPtr, SIZE_PER_LINE, + " \n", (long unsigned)i, + (long unsigned)(i - 1)); + } + + snprintf(textPtr, SIZE_PER_LINE, + " \">\n" + " %%define_g;\n" + "]>\n" + "\n", + (long unsigned)(N_LINES - 1)); + + XML_Parser parser = XML_ParserCreate(NULL); + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(parser); + + XML_ParserFree(parser); + free(text); +} +END_TEST + START_TEST(test_nested_entity_suspend) { const char *const text = "'>\n" @@ -6943,6 +7190,35 @@ START_TEST(test_nested_entity_suspend) { } END_TEST +START_TEST(test_nested_entity_suspend_2) { + const char *const text = "\n" + " \n" + " \n" + "]>\n" + "&ge3;"; + const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1") + XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3"); + CharData storage; + CharData_Init(&storage); + XML_Parser parser = XML_ParserCreate(NULL); + ParserPlusStorage parserPlusStorage = {parser, &storage}; + + XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend); + XML_SetUserData(parser, &parserPlusStorage); + + enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); + while (status == XML_STATUS_SUSPENDED) { + status = XML_ResumeParser(parser); + } + if (status != XML_STATUS_OK) + xml_failure(parser); + + CharData_CheckXMLChars(&storage, expected); + XML_ParserFree(parser); +} +END_TEST + /* Regression test for quadratic parsing on large tokens */ START_TEST(test_big_tokens_take_linear_time) { const char *const too_slow_failure_message @@ -8349,6 +8625,29 @@ duff_reallocator(void *ptr, size_t size) { return realloc(ptr, size); } +// Portable remake of strndup(3) for C99; does not care about space efficiency +static char * +portable_strndup(const char *s, size_t n) { + if ((s == NULL) || (n == SIZE_MAX)) { + errno = EINVAL; + return NULL; + } + + char *const buffer = (char *)malloc(n + 1); + if (buffer == NULL) { + errno = ENOMEM; + return NULL; + } + + errno = 0; + + memcpy(buffer, s, n); + + buffer[n] = '\0'; + + return buffer; +} + /* Test that a failure to allocate the parser structure fails gracefully */ START_TEST(test_misc_alloc_create_parser) { XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free}; @@ -8723,7 +9022,7 @@ END_TEST START_TEST(test_misc_stopparser_rejects_unstarted_parser) { const XML_Bool cases[] = {XML_TRUE, XML_FALSE}; for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { - const XML_Bool resumable = cases[i]; + resumable = cases[i]; XML_Parser parser = XML_ParserCreate(NULL); assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE); assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR); @@ -8733,6 +9032,105 @@ START_TEST(test_misc_stopparser_rejects_unstarted_parser) { } END_TEST +/* Adaptation of accumulate_characters that takes ExtHdlrData input to work with + * test_renter_loop_finite_content below */ +static void XMLCALL +accumulate_characters_ext_handler(void *userData, const XML_Char *s, int len) { + ExtHdlrData *const test_data = (ExtHdlrData *)userData; + CharData_AppendXMLChars(test_data->storage, s, len); +} + +/* Test that internalEntityProcessor does not re-enter forever; + * based on files tests/xmlconf/xmltest/valid/ext-sa/012.{xml,ent} */ +START_TEST(test_renter_loop_finite_content) { + CharData storage; + CharData_Init(&storage); + const char *const text = "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "]>\n" + "&e1;\n"; + ExtHdlrData test_data = {"&e4;\n", external_entity_null_loader, &storage}; + const XML_Char *const expected = XCS("(e5)\n"); + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader); + XML_SetCharacterDataHandler(parser, accumulate_characters_ext_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(parser); + + CharData_CheckXMLChars(&storage, expected); + XML_ParserFree(parser); +} +END_TEST + +// Inspired by function XML_OriginalString of Perl's XML::Parser +static char * +dup_original_string(XML_Parser parser) { + const int byte_count = XML_GetCurrentByteCount(parser); + + assert_true(byte_count >= 0); + + int offset = -1; + int size = -1; + + const char *const context = XML_GetInputContext(parser, &offset, &size); + +#if XML_CONTEXT_BYTES > 0 + assert_true(context != NULL); + assert_true(offset >= 0); + assert_true(size >= 0); + return portable_strndup(context + offset, byte_count); +#else + assert_true(context == NULL); + return NULL; +#endif +} + +static void +on_characters_issue_980(void *userData, const XML_Char *s, int len) { + (void)s; + (void)len; + XML_Parser parser = (XML_Parser)userData; + + char *const original_string = dup_original_string(parser); + +#if XML_CONTEXT_BYTES > 0 + assert_true(original_string != NULL); + assert_true(strcmp(original_string, "&draft.day;") == 0); + free(original_string); +#else + assert_true(original_string == NULL); +#endif +} + +START_TEST(test_misc_expected_event_ptr_issue_980) { + // NOTE: This is a tiny subset of sample "REC-xml-19980210.xml" + // from Perl's XML::Parser + const char *const doc = "\n" + "]>\n" + "&draft.day;\n"; + + XML_Parser parser = XML_ParserCreate(NULL); + XML_SetUserData(parser, parser); + XML_SetCharacterDataHandler(parser, on_characters_issue_980); + + assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), + /*isFinal=*/XML_TRUE) + == XML_STATUS_OK); + + XML_ParserFree(parser); +} +END_TEST + static void alloc_setup(void) { XML_Memory_Handling_Suite memsuite = {duff_allocator, duff_reallocator, free}; @@ -9311,6 +9709,31 @@ START_TEST(test_alloc_internal_entity) { } END_TEST +START_TEST(test_alloc_parameter_entity) { + const char *text = "\">" + "%param1;" + "]> &internal;content"; + int i; + const int alloc_test_max_repeats = 30; + + for (i = 0; i < alloc_test_max_repeats; i++) { + allocation_count = i; + XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + != XML_STATUS_ERROR) + break; + alloc_teardown(); + alloc_setup(); + } + allocation_count = -1; + if (i == 0) + fail("Parameter entity processed despite duff allocator"); + if (i == alloc_test_max_repeats) + fail("Parameter entity not processed at max allocation count"); +} +END_TEST + /* Test the robustness against allocation failure of element handling * Based on test_dtd_default_handling(). */ @@ -12955,6 +13378,7 @@ make_suite(void) { test_wfc_undeclared_entity_with_external_subset_standalone); tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs); + tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); @@ -13137,7 +13561,13 @@ make_suite(void) { tcase_add_test(tc_basic, test_empty_element_abort); tcase_add_test__ifdef_xml_dtd(tc_basic, test_pool_integrity_with_unfinished_attr); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_ref_no_elements); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_deep_nested_entity); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_deep_nested_attribute_entity); + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_deep_nested_entity_delayed_interpretation); tcase_add_test(tc_basic, test_nested_entity_suspend); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_nested_entity_suspend_2); tcase_add_test(tc_basic, test_big_tokens_take_linear_time); tcase_add_test(tc_basic, test_set_reparse_deferral); tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); @@ -13200,6 +13630,8 @@ make_suite(void) { tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak); tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing); tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser); + tcase_add_test__ifdef_xml_dtd(tc_misc, test_renter_loop_finite_content); + tcase_add_test(tc_misc, test_misc_expected_event_ptr_issue_980); suite_add_tcase(s, tc_alloc); tcase_add_checked_fixture(tc_alloc, alloc_setup, alloc_teardown); @@ -13216,6 +13648,7 @@ make_suite(void) { tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_external_entity); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_ext_entity_set_encoding); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_internal_entity); + tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_parameter_entity); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_dtd_default_handling); tcase_add_test(tc_alloc, test_alloc_explicit_encoding); tcase_add_test(tc_alloc, test_alloc_set_base);