Rebase to 2.9.12 (#1960623)

Resolves: #1960623
Resolves: #1958784
Resolves: CVE-2021-3541
This commit is contained in:
David King 2021-05-14 14:25:38 +01:00
parent 8431385faa
commit 4c0e536172
16 changed files with 241 additions and 777 deletions

3
.gitignore vendored
View File

@ -39,3 +39,6 @@ libxml2-2.7.7.tar.gz
/libxml2-2.9.8.tar.gz
/libxml2-2.9.9.tar.gz
/libxml2-2.9.10.tar.gz
/libxml2-2.9.12.tar.gz
/gpgkey-DB46681BB91ADCEA170FA2D415588B26596BEA5D.gpg
/libxml2-2.9.12.tar.gz.asc

View File

@ -1,33 +0,0 @@
From 0815302dee2b78139832c2080348086a0564836b Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Fri, 6 Dec 2019 12:27:29 +0100
Subject: [PATCH] Fix freeing of nested documents
Apparently, some libxslt RVTs can contain nested document nodes, see
issue #132. I'm not sure how this happens exactly but it can cause a
segfault in xmlFreeNodeList after the changes in commit 0762c9b6.
Make sure not to touch the (nonexistent) `content` member of xmlDocs.
---
tree.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/tree.c b/tree.c
index 070670f1..0d7fc98c 100644
--- a/tree.c
+++ b/tree.c
@@ -3708,6 +3708,11 @@ xmlFreeNodeList(xmlNodePtr cur) {
(cur->type != XML_XINCLUDE_START) &&
(cur->type != XML_XINCLUDE_END) &&
(cur->type != XML_ENTITY_REF_NODE) &&
+ (cur->type != XML_DOCUMENT_NODE) &&
+#ifdef LIBXML_DOCB_ENABLED
+ (cur->type != XML_DOCB_DOCUMENT_NODE) &&
+#endif
+ (cur->type != XML_HTML_DOCUMENT_NODE) &&
(cur->content != (xmlChar *) &(cur->properties))) {
DICT_FREE(cur->content)
}
--
2.22.0

View File

@ -1,33 +0,0 @@
From 6088a74bcf7d0c42e24cff4594d804e1d3c9fbca Mon Sep 17 00:00:00 2001
From: Zhipeng Xie <xiezhipeng1@huawei.com>
Date: Tue, 20 Aug 2019 16:33:06 +0800
Subject: [PATCH] Fix memory leak in xmlSchemaValidateStream
When ctxt->schema is NULL, xmlSchemaSAXPlug->xmlSchemaPreRun
alloc a new schema for ctxt->schema and set vctxt->xsiAssemble
to 1. Then xmlSchemaVStart->xmlSchemaPreRun initialize
vctxt->xsiAssemble to 0 again which cause the alloced schema
can not be freed anymore.
Found with libFuzzer.
Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
---
xmlschemas.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/xmlschemas.c b/xmlschemas.c
index 301c8449..39d92182 100644
--- a/xmlschemas.c
+++ b/xmlschemas.c
@@ -28090,7 +28090,6 @@ xmlSchemaPreRun(xmlSchemaValidCtxtPtr vctxt) {
vctxt->nberrors = 0;
vctxt->depth = -1;
vctxt->skipDepth = -1;
- vctxt->xsiAssemble = 0;
vctxt->hasKeyrefs = 0;
#ifdef ENABLE_IDC_NODE_TABLES_TEST
vctxt->createIDCNodeTables = 1;
--
2.24.1

View File

@ -1,36 +0,0 @@
From 50f06b3efb638efb0abd95dc62dca05ae67882c2 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Fri, 7 Aug 2020 21:54:27 +0200
Subject: [PATCH] Fix out-of-bounds read with 'xmllint --htmlout'
Make sure that truncated UTF-8 sequences don't cause an out-of-bounds
array access.
Thanks to @SuhwanSong and the Agency for Defense Development (ADD) for
the report.
Fixes #178.
---
xmllint.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/xmllint.c b/xmllint.c
index f6a8e463..c647486f 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -528,6 +528,12 @@ static void
xmlHTMLEncodeSend(void) {
char *result;
+ /*
+ * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might
+ * end with a truncated UTF-8 sequence. This is a hack to at least avoid
+ * an out-of-bounds read.
+ */
+ memset(&buffer[sizeof(buffer)-4], 0, 4);
result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
if (result) {
xmlGenericError(xmlGenericErrorContext, "%s", result);
--
2.28.0.rc2

View File

@ -1,32 +0,0 @@
From 0e1a49c8907645d2e155f0d89d4d9895ac5112b5 Mon Sep 17 00:00:00 2001
From: Zhipeng Xie <xiezhipeng1@huawei.com>
Date: Thu, 12 Dec 2019 17:30:55 +0800
Subject: [PATCH] Fix infinite loop in xmlStringLenDecodeEntities
When ctxt->instate == XML_PARSER_EOF,xmlParseStringEntityRef
return NULL which cause a infinite loop in xmlStringLenDecodeEntities
Found with libFuzzer.
Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
---
parser.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/parser.c b/parser.c
index d1c31963..a34bb6cd 100644
--- a/parser.c
+++ b/parser.c
@@ -2646,7 +2646,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
else
c = 0;
while ((c != 0) && (c != end) && /* non input consuming loop */
- (c != end2) && (c != end3)) {
+ (c != end2) && (c != end3) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
if (c == 0) break;
if ((c == '&') && (str[1] == '#')) {
--
2.24.1

View File

@ -1,31 +0,0 @@
From 1358d157d0bd83be1dfe356a69213df9fac0b539 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Wed, 21 Apr 2021 13:23:27 +0200
Subject: [PATCH] Fix use-after-free with `xmllint --html --push`
Call htmlCtxtUseOptions to make sure that names aren't stored in
dictionaries.
Note that this issue only affects xmllint using the HTML push parser.
Fixes #230.
---
xmllint.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xmllint.c b/xmllint.c
index 6ca1bf54..dbef273a 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -2213,7 +2213,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) {
if (res > 0) {
ctxt = htmlCreatePushParserCtxt(NULL, NULL,
chars, res, filename, XML_CHAR_ENCODING_NONE);
- xmlCtxtUseOptions(ctxt, options);
+ htmlCtxtUseOptions(ctxt, options);
while ((res = fread(chars, 1, pushsize, f)) > 0) {
htmlParseChunk(ctxt, chars, res, 0);
}
--
GitLab

View File

@ -1,49 +0,0 @@
From bf22713507fe1fc3a2c4b525cf0a88c2dc87a3a2 Mon Sep 17 00:00:00 2001
From: Joel Hockey <joel.hockey@gmail.com>
Date: Sun, 16 Aug 2020 17:19:35 -0700
Subject: [PATCH] Validate UTF8 in xmlEncodeEntities
Code is currently assuming UTF-8 without validating. Truncated UTF-8
input can cause out-of-bounds array access.
Adds further checks to partial fix in 50f06b3e.
Fixes #178
---
entities.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/entities.c b/entities.c
index 37b99a56..1a8f86f0 100644
--- a/entities.c
+++ b/entities.c
@@ -704,11 +704,25 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
} else {
/*
* We assume we have UTF-8 input.
+ * It must match either:
+ * 110xxxxx 10xxxxxx
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * That is:
+ * cur[0] is 11xxxxxx
+ * cur[1] is 10xxxxxx
+ * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
+ * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
+ * cur[0] is not 11111xxx
*/
char buf[11], *ptr;
int val = 0, l = 1;
- if (*cur < 0xC0) {
+ if (((cur[0] & 0xC0) != 0xC0) ||
+ ((cur[1] & 0xC0) != 0x80) ||
+ (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
+ (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
+ (((cur[0] & 0xF8) == 0xF8))) {
xmlEntitiesErr(XML_CHECK_NOT_UTF8,
"xmlEncodeEntities: input not UTF-8");
if (doc != NULL)
--
GitLab

View File

@ -1,247 +0,0 @@
From 752e5f71d7cea2ca5a7e7c0b8f72ed04ce654be4 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Wed, 10 Jun 2020 16:34:52 +0200
Subject: [PATCH 1/2] Don't recurse into xi:include children in
xmlXIncludeDoProcess
Otherwise, nested xi:include nodes might result in a use-after-free
if XML_PARSE_NOXINCNODE is specified.
Found with libFuzzer and ASan.
---
result/XInclude/fallback3.xml | 8 ++++++++
result/XInclude/fallback3.xml.err | 0
result/XInclude/fallback3.xml.rdr | 25 +++++++++++++++++++++++++
result/XInclude/fallback4.xml | 10 ++++++++++
result/XInclude/fallback4.xml.err | 0
result/XInclude/fallback4.xml.rdr | 29 +++++++++++++++++++++++++++++
test/XInclude/docs/fallback3.xml | 9 +++++++++
test/XInclude/docs/fallback4.xml | 7 +++++++
xinclude.c | 24 ++++++++++--------------
9 files changed, 98 insertions(+), 14 deletions(-)
create mode 100644 result/XInclude/fallback3.xml
create mode 100644 result/XInclude/fallback3.xml.err
create mode 100644 result/XInclude/fallback3.xml.rdr
create mode 100644 result/XInclude/fallback4.xml
create mode 100644 result/XInclude/fallback4.xml.err
create mode 100644 result/XInclude/fallback4.xml.rdr
create mode 100644 test/XInclude/docs/fallback3.xml
create mode 100644 test/XInclude/docs/fallback4.xml
diff --git a/result/XInclude/fallback3.xml b/result/XInclude/fallback3.xml
new file mode 100644
index 00000000..b4235514
--- /dev/null
+++ b/result/XInclude/fallback3.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<a>
+ <doc xml:base="../ents/something.xml">
+<p>something</p>
+<p>really</p>
+<p>simple</p>
+</doc>
+</a>
diff --git a/result/XInclude/fallback3.xml.err b/result/XInclude/fallback3.xml.err
new file mode 100644
index 00000000..e69de29b
diff --git a/result/XInclude/fallback3.xml.rdr b/result/XInclude/fallback3.xml.rdr
new file mode 100644
index 00000000..aa2f1374
--- /dev/null
+++ b/result/XInclude/fallback3.xml.rdr
@@ -0,0 +1,25 @@
+0 1 a 0 0
+1 14 #text 0 1
+
+1 1 doc 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 something
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 really
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 simple
+2 15 p 0 0
+2 14 #text 0 1
+
+1 15 doc 0 0
+1 14 #text 0 1
+
+0 15 a 0 0
diff --git a/result/XInclude/fallback4.xml b/result/XInclude/fallback4.xml
new file mode 100644
index 00000000..9883fd54
--- /dev/null
+++ b/result/XInclude/fallback4.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<a>
+
+ <doc xml:base="../ents/something.xml">
+<p>something</p>
+<p>really</p>
+<p>simple</p>
+</doc>
+
+</a>
diff --git a/result/XInclude/fallback4.xml.err b/result/XInclude/fallback4.xml.err
new file mode 100644
index 00000000..e69de29b
diff --git a/result/XInclude/fallback4.xml.rdr b/result/XInclude/fallback4.xml.rdr
new file mode 100644
index 00000000..628b9513
--- /dev/null
+++ b/result/XInclude/fallback4.xml.rdr
@@ -0,0 +1,29 @@
+0 1 a 0 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 doc 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 something
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 really
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 simple
+2 15 p 0 0
+2 14 #text 0 1
+
+1 15 doc 0 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+0 15 a 0 0
diff --git a/test/XInclude/docs/fallback3.xml b/test/XInclude/docs/fallback3.xml
new file mode 100644
index 00000000..0c8b6c9e
--- /dev/null
+++ b/test/XInclude/docs/fallback3.xml
@@ -0,0 +1,9 @@
+<a>
+ <xi:include href="../ents/something.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
+ <xi:fallback>
+ <xi:include href="c.xml">
+ <xi:fallback>There is no c.xml ... </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+</a>
diff --git a/test/XInclude/docs/fallback4.xml b/test/XInclude/docs/fallback4.xml
new file mode 100644
index 00000000..b500a635
--- /dev/null
+++ b/test/XInclude/docs/fallback4.xml
@@ -0,0 +1,7 @@
+<a>
+ <xi:include href="c.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
+ <xi:fallback>
+ <xi:include href="../ents/something.xml"/>
+ </xi:fallback>
+ </xi:include>
+</a>
diff --git a/xinclude.c b/xinclude.c
index ba850fa5..f260c1a7 100644
--- a/xinclude.c
+++ b/xinclude.c
@@ -2392,21 +2392,19 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) {
* First phase: lookup the elements in the document
*/
cur = tree;
- if (xmlXIncludeTestNode(ctxt, cur) == 1)
- xmlXIncludePreProcessNode(ctxt, cur);
while ((cur != NULL) && (cur != tree->parent)) {
/* TODO: need to work on entities -> stack */
- if ((cur->children != NULL) &&
- (cur->children->type != XML_ENTITY_DECL) &&
- (cur->children->type != XML_XINCLUDE_START) &&
- (cur->children->type != XML_XINCLUDE_END)) {
- cur = cur->children;
- if (xmlXIncludeTestNode(ctxt, cur))
- xmlXIncludePreProcessNode(ctxt, cur);
- } else if (cur->next != NULL) {
+ if (xmlXIncludeTestNode(ctxt, cur) == 1) {
+ xmlXIncludePreProcessNode(ctxt, cur);
+ } else if ((cur->children != NULL) &&
+ (cur->children->type != XML_ENTITY_DECL) &&
+ (cur->children->type != XML_XINCLUDE_START) &&
+ (cur->children->type != XML_XINCLUDE_END)) {
+ cur = cur->children;
+ continue;
+ }
+ if (cur->next != NULL) {
cur = cur->next;
- if (xmlXIncludeTestNode(ctxt, cur))
- xmlXIncludePreProcessNode(ctxt, cur);
} else {
if (cur == tree)
break;
@@ -2416,8 +2414,6 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) {
break; /* do */
if (cur->next != NULL) {
cur = cur->next;
- if (xmlXIncludeTestNode(ctxt, cur))
- xmlXIncludePreProcessNode(ctxt, cur);
break; /* do */
}
} while (cur != NULL);
--
2.31.1
From 49cc4182543dba73216add4021994a81678763bd Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Thu, 22 Apr 2021 19:26:28 +0200
Subject: [PATCH 2/2] Fix user-after-free with `xmllint --xinclude --dropdtd`
The --dropdtd option can leave dangling pointers in entity reference
nodes. Make sure to skip these nodes when processing XIncludes.
This also avoids scanning entity declarations and even modifying
them inadvertently during XInclude processing.
Move from a block list to an allow list approach to avoid descending
into other node types that can't contain elements.
Fixes #237.
---
xinclude.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/xinclude.c b/xinclude.c
index f260c1a7..d7648529 100644
--- a/xinclude.c
+++ b/xinclude.c
@@ -2397,9 +2397,8 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) {
if (xmlXIncludeTestNode(ctxt, cur) == 1) {
xmlXIncludePreProcessNode(ctxt, cur);
} else if ((cur->children != NULL) &&
- (cur->children->type != XML_ENTITY_DECL) &&
- (cur->children->type != XML_XINCLUDE_START) &&
- (cur->children->type != XML_XINCLUDE_END)) {
+ ((cur->type == XML_DOCUMENT_NODE) ||
+ (cur->type == XML_ELEMENT_NODE))) {
cur = cur->children;
continue;
}
--
2.31.1

View File

@ -1,44 +0,0 @@
From babe75030c7f64a37826bb3342317134568bef61 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Sat, 1 May 2021 16:53:33 +0200
Subject: [PATCH] Propagate error in xmlParseElementChildrenContentDeclPriv
Check return value of recursive calls to
xmlParseElementChildrenContentDeclPriv and return immediately in case
of errors. Otherwise, struct xmlElementContent could contain unexpected
null pointers, leading to a null deref when post-validating documents
which aren't well-formed and parsed in recovery mode.
Fixes #243.
---
parser.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/parser.c b/parser.c
index b42e6043..73c27edd 100644
--- a/parser.c
+++ b/parser.c
@@ -6208,6 +6208,8 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
SKIP_BLANKS;
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
depth + 1);
+ if (cur == NULL)
+ return(NULL);
SKIP_BLANKS;
GROW;
} else {
@@ -6341,6 +6343,11 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
SKIP_BLANKS;
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
depth + 1);
+ if (last == NULL) {
+ if (ret != NULL)
+ xmlFreeDocElementContent(ctxt->myDoc, ret);
+ return(NULL);
+ }
SKIP_BLANKS;
} else {
elem = xmlParseName(ctxt);
--
GitLab

View File

@ -1,41 +0,0 @@
From 8e7c20a1af8776677d7890f30b7a180567701a49 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Mon, 3 Aug 2020 17:30:41 +0200
Subject: [PATCH] Fix integer overflow when comparing schema dates
Found by OSS-Fuzz.
---
xmlschemastypes.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/xmlschemastypes.c b/xmlschemastypes.c
index 4249d700..d6b9f924 100644
--- a/xmlschemastypes.c
+++ b/xmlschemastypes.c
@@ -3691,6 +3691,8 @@ xmlSchemaCompareDurations(xmlSchemaValPtr x, xmlSchemaValPtr y)
minday = 0;
maxday = 0;
} else {
+ if (myear > LONG_MAX / 366)
+ return -2;
/* FIXME: This doesn't take leap year exceptions every 100/400 years
into account. */
maxday = 365 * myear + (myear + 3) / 4;
@@ -4079,6 +4081,14 @@ xmlSchemaCompareDates (xmlSchemaValPtr x, xmlSchemaValPtr y)
if ((x == NULL) || (y == NULL))
return -2;
+ if ((x->value.date.year > LONG_MAX / 366) ||
+ (x->value.date.year < LONG_MIN / 366) ||
+ (y->value.date.year > LONG_MAX / 366) ||
+ (y->value.date.year < LONG_MIN / 366)) {
+ /* Possible overflow when converting to days. */
+ return -2;
+ }
+
if (x->value.date.tz_flag) {
if (!y->value.date.tz_flag) {
--
2.28.0.rc2

View File

@ -1,92 +0,0 @@
From edc7b6abb0c125eeb888748c334897f60aab0854 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
Date: Fri, 28 Feb 2020 12:48:14 +0100
Subject: [PATCH] Parenthesize Py<type>_Check() in ifs
In C, if expressions should be parenthesized.
PyLong_Check, PyUnicode_Check etc. happened to expand to a parenthesized
expression before, but that's not API to rely on.
Since Python 3.9.0a4 it needs to be parenthesized explicitly.
Fixes https://gitlab.gnome.org/GNOME/libxml2/issues/149
---
python/libxml.c | 4 ++--
python/types.c | 12 ++++++------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/python/libxml.c b/python/libxml.c
index bc676c4e..81e709f3 100644
--- a/python/libxml.c
+++ b/python/libxml.c
@@ -294,7 +294,7 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
lenread = PyBytes_Size(ret);
data = PyBytes_AsString(ret);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (ret) {
+ } else if (PyUnicode_Check (ret)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t size;
const char *tmp;
@@ -359,7 +359,7 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
lenread = PyBytes_Size(ret);
data = PyBytes_AsString(ret);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (ret) {
+ } else if (PyUnicode_Check (ret)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t size;
const char *tmp;
diff --git a/python/types.c b/python/types.c
index c2bafeb1..ed284ec7 100644
--- a/python/types.c
+++ b/python/types.c
@@ -602,16 +602,16 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
if (obj == NULL) {
return (NULL);
}
- if PyFloat_Check (obj) {
+ if (PyFloat_Check (obj)) {
ret = xmlXPathNewFloat((double) PyFloat_AS_DOUBLE(obj));
- } else if PyLong_Check(obj) {
+ } else if (PyLong_Check(obj)) {
#ifdef PyLong_AS_LONG
ret = xmlXPathNewFloat((double) PyLong_AS_LONG(obj));
#else
ret = xmlXPathNewFloat((double) PyInt_AS_LONG(obj));
#endif
#ifdef PyBool_Check
- } else if PyBool_Check (obj) {
+ } else if (PyBool_Check (obj)) {
if (obj == Py_True) {
ret = xmlXPathNewBoolean(1);
@@ -620,14 +620,14 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
ret = xmlXPathNewBoolean(0);
}
#endif
- } else if PyBytes_Check (obj) {
+ } else if (PyBytes_Check (obj)) {
xmlChar *str;
str = xmlStrndup((const xmlChar *) PyBytes_AS_STRING(obj),
PyBytes_GET_SIZE(obj));
ret = xmlXPathWrapString(str);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (obj) {
+ } else if (PyUnicode_Check (obj)) {
#if PY_VERSION_HEX >= 0x03030000
xmlChar *str;
const char *tmp;
@@ -650,7 +650,7 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
ret = xmlXPathWrapString(str);
#endif
#endif
- } else if PyList_Check (obj) {
+ } else if (PyList_Check (obj)) {
int i;
PyObject *node;
xmlNodePtr cur;
--
2.24.1

View File

@ -0,0 +1,211 @@
From 85b1792e37b131e7a51af98a37f92472e8de5f3f Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Tue, 18 May 2021 20:08:28 +0200
Subject: [PATCH] Work around lxml API abuse
Make xmlNodeDumpOutput and htmlNodeDumpFormatOutput work with corrupted
parent pointers. This used to work with the old recursive code but the
non-recursive rewrite required parent pointers to be set correctly.
Unfortunately, lxml relies on the old behavior and passes subtrees with
a corrupted structure. Fall back to a recursive function call if an
invalid parent pointer is detected.
Fixes #255.
---
HTMLtree.c | 46 ++++++++++++++++++++++++++++------------------
xmlsave.c | 31 +++++++++++++++++++++----------
2 files changed, 49 insertions(+), 28 deletions(-)
diff --git a/HTMLtree.c b/HTMLtree.c
index 24434d45..bdd639c7 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -744,7 +744,7 @@ void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
int format) {
- xmlNodePtr root;
+ xmlNodePtr root, parent;
xmlAttrPtr attr;
const htmlElemDesc * info;
@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
}
root = cur;
+ parent = cur->parent;
while (1) {
switch (cur->type) {
case XML_HTML_DOCUMENT_NODE:
@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
if (((xmlDocPtr) cur)->intSubset != NULL) {
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
}
- if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if ((cur->parent == parent) && (cur->children != NULL)) {
+ parent = cur;
cur = cur->children;
continue;
}
break;
case XML_ELEMENT_NODE:
+ /*
+ * Some users like lxml are known to pass nodes with a corrupted
+ * tree structure. Fall back to a recursive call to handle this
+ * case.
+ */
+ if ((cur->parent != parent) && (cur->children != NULL)) {
+ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
+ break;
+ }
+
/*
* Get specific HTML info for that node.
*/
@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(cur->name != NULL) &&
(cur->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
+ parent = cur;
cur = cur->children;
continue;
}
@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(info != NULL) && (!info->isinline)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
- (cur->parent != NULL) &&
- (cur->parent->name != NULL) &&
- (cur->parent->name[0] != 'p')) /* p, pre, param */
+ (parent != NULL) &&
+ (parent->name != NULL) &&
+ (parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
break;
if (((cur->name == (const xmlChar *)xmlStringText) ||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
- ((cur->parent == NULL) ||
- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
+ ((parent == NULL) ||
+ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
+ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
break;
}
- /*
- * The parent should never be NULL here but we want to handle
- * corrupted documents gracefully.
- */
- if (cur->parent == NULL)
- return;
- cur = cur->parent;
+ cur = parent;
+ /* cur->parent was validated when descending. */
+ parent = cur->parent;
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
(cur->type == XML_DOCUMENT_NODE)) {
@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(cur->next != NULL)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
- (cur->parent != NULL) &&
- (cur->parent->name != NULL) &&
- (cur->parent->name[0] != 'p')) /* p, pre, param */
+ (parent != NULL) &&
+ (parent->name != NULL) &&
+ (parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
}
diff --git a/xmlsave.c b/xmlsave.c
index 61a40459..aedbd5e7 100644
--- a/xmlsave.c
+++ b/xmlsave.c
@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
static void
xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
int format = ctxt->format;
- xmlNodePtr tmp, root, unformattedNode = NULL;
+ xmlNodePtr tmp, root, unformattedNode = NULL, parent;
xmlAttrPtr attr;
xmlChar *start, *end;
xmlOutputBufferPtr buf;
@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
buf = ctxt->buf;
root = cur;
+ parent = cur->parent;
while (1) {
switch (cur->type) {
case XML_DOCUMENT_NODE:
@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_DOCUMENT_FRAG_NODE:
- if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if ((cur->parent == parent) && (cur->children != NULL)) {
+ parent = cur;
cur = cur->children;
continue;
}
@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_ELEMENT_NODE:
- if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
+ /*
+ * Some users like lxml are known to pass nodes with a corrupted
+ * tree structure. Fall back to a recursive call to handle this
+ * case.
+ */
+ if ((cur->parent != parent) && (cur->children != NULL)) {
+ xmlNodeDumpOutputInternal(ctxt, cur);
+ break;
+ }
+
+ if ((ctxt->level > 0) && (ctxt->format == 1) &&
+ (xmlIndentTreeOutput))
xmlOutputBufferWrite(buf, ctxt->indent_size *
(ctxt->level > ctxt->indent_nr ?
ctxt->indent_nr : ctxt->level),
@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
xmlOutputBufferWrite(buf, 1, ">");
if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
if (ctxt->level >= 0) ctxt->level++;
+ parent = cur;
cur = cur->children;
continue;
}
@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
}
- /*
- * The parent should never be NULL here but we want to handle
- * corrupted documents gracefully.
- */
- if (cur->parent == NULL)
- return;
- cur = cur->parent;
+ cur = parent;
+ /* cur->parent was validated when descending. */
+ parent = cur->parent;
if (cur->type == XML_ELEMENT_NODE) {
if (ctxt->level > 0) ctxt->level--;
--
GitLab

View File

@ -6,9 +6,9 @@
exec_prefix=@exec_prefix@
includedir=@includedir@
! libdir=@libdir@
cflags=
libs=
usage()
{
--- 3,14 ----
prefix=@prefix@
exec_prefix=@exec_prefix@
@ -19,6 +19,6 @@
! else
! libdir=${exec_prefix}/lib64
! fi
cflags=
libs=
usage()
{

View File

@ -1,39 +1,26 @@
Name: libxml2
Version: 2.9.10
Release: 12%{?dist}
Version: 2.9.12
Release: 1%{?dist}
Summary: Library providing XML and HTML support
License: MIT
URL: http://xmlsoft.org/
Source: ftp://xmlsoft.org/libxml2/libxml2-%{version}.tar.gz
Source0: ftp://xmlsoft.org/libxml2/libxml2-%{version}.tar.gz
Source1: ftp://xmlsoft.org/libxml2/libxml2-%{version}.tar.gz.asc
# gpg --keyserver keys.gnupg.net --recv-keys DB46681BB91ADCEA170FA2D415588B26596BEA5D
# gpg2 --export --export-options export-minimal DB46681BB91ADCEA170FA2D415588B26596BEA5D > gpgkey-DB46681BB91ADCEA170FA2D415588B26596BEA5D.gpg
Source2: gpgkey-DB46681BB91ADCEA170FA2D415588B26596BEA5D.gpg
Patch0: libxml2-multilib.patch
# Patch from openSUSE.
# See: https://bugzilla.gnome.org/show_bug.cgi?id=789714
Patch1: libxml2-2.9.8-python3-unicode-errors.patch
Patch2: https://gitlab.gnome.org/GNOME/libxml2/commit/0815302dee2b78139832c2080348086a0564836b.patch#/fix-relaxed-approach-to-nested-documents.patch
# https://gitlab.gnome.org/GNOME/libxml2/merge_requests/68
Patch3: libxml2-2.9.10-CVE-2019-20388.patch
# https://gitlab.gnome.org/GNOME/libxml2/merge_requests/63
Patch4: libxml2-2.9.10-CVE-2020-7595.patch
# https://gitlab.gnome.org/GNOME/libxml2/merge_requests/71
Patch5: libxml2-2.9.10-parenthesize-type-checks.patch
Patch6: libxml2-2.9.10-fix-integer-overflow.patch
# https://bugzilla.redhat.com/show_bug.cgi?id=1877788
Patch7: libxml2-2.9.10-CVE-2020-24977.patch
# https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/87
Patch8: python-py_ssize_t.patch
# https://bugzilla.redhat.com/show_bug.cgi?id=1956969
Patch9: libxml2-2.9.10-CVE-2021-3516.patch
# https://bugzilla.redhat.com/show_bug.cgi?id=1957002
Patch10: libxml2-2.9.10-CVE-2021-3517.patch
# https://bugzilla.redhat.com/show_bug.cgi?id=1957029
Patch11: libxml2-2.9.10-CVE-2021-3518.patch
# https://bugzilla.redhat.com/show_bug.cgi?id=1957285
Patch12: libxml2-2.9.10-CVE-2021-3537.patch
# https://gitlab.gnome.org/GNOME/libxml2/-/issues/255
Patch2: libxml2-2.9.12-fix-lxml-corrupted-tree.patch
BuildRequires: gcc
BuildRequires: make
BuildRequires: cmake-rpm-macros
BuildRequires: gcc
BuildRequires: gnupg2
BuildRequires: make
BuildRequires: pkgconfig(zlib)
BuildRequires: pkgconfig(liblzma)
@ -91,6 +78,7 @@ this includes parsing and validation even with complex DTDs, either
at parse time or later once the document has been modified.
%prep
%{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}'
%autosetup -p1
find doc -type f -executable -print -exec chmod 0644 {} ';'
@ -98,14 +86,11 @@ find doc -type f -executable -print -exec chmod 0644 {} ';'
rm python/{libxml2-py.c,libxml2-py.h,libxml2-export.c}
%build
mkdir py3
%global _configure ../configure
%global _configure_disable_silent_rules 1
( cd py3 && %configure --cache-file=../config.cache --with-python=%{__python3} )
%make_build -C py3
%configure --with-python=%{__python3}
%make_build
%install
%make_install -C py3
%make_install
# multiarch crazyness on timestamp differences or Makefile/binaries for examples
touch -m --reference=%{buildroot}%{_includedir}/libxml2/libxml/parser.h %{buildroot}%{_bindir}/xml2-config
@ -117,7 +102,7 @@ rm -vrf %{buildroot}%{_datadir}/doc/
gzip -9 -c doc/libxml2-api.xml > doc/libxml2-api.xml.gz
%check
%make_build runtests -C py3
%make_build runtests
%ldconfig_scriptlets
@ -161,6 +146,9 @@ gzip -9 -c doc/libxml2-api.xml > doc/libxml2-api.xml.gz
%{python3_sitearch}/libxml2mod.so
%changelog
* Fri May 14 2021 David King <dking@redhat.com> - 2.9.12-1
- Rebase to 2.9.12 (#1960623)
* Thu May 13 2021 David King <dking@redhat.com> - 2.9.10-12
- Fix CVE-2021-3516 (#1956969)
- Fix CVE-2021-3517 (#1957002)

View File

@ -1,102 +0,0 @@
From 43e946dd497cc6ff0067b8a8f85c620376dfd4cd Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 9 Nov 2020 18:19:31 +0100
Subject: [PATCH 1/2] Build the Python extension with PY_SSIZE_T_CLEAN
The Python extension module now uses Py_ssize_t rather than int for
string lengths. This change makes the extension compatible with
Python 3.10.
Fixes #203.
---
python/generator.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
From b3db67629465823f042a5f3303ecdf8e4bd09a76 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 10 Nov 2020 15:42:36 +0100
Subject: [PATCH 2/2] Convert python/libxml.c to PY_SSIZE_T_CLEAN
Define PY_SSIZE_T_CLEAN macro in python/libxml.c and cast the string
length (int len) explicitly to Py_ssize_t when passing a string to a
function call using PyObject_CallMethod() with the "s#" format.
---
python/libxml.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/python/generator.py b/python/generator.py
index c0cb3add..59d45e00 100755
--- a/python/generator.py
+++ b/python/generator.py
@@ -393,7 +393,7 @@ def print_function_wrapper(name, output, export, include):
format_args = format_args + ", &%s" % (arg[0])
if f == 's#':
format_args = format_args + ", &py_buffsize%d" % num_bufs
- c_args = c_args + " int py_buffsize%d;\n" % num_bufs
+ c_args = c_args + " Py_ssize_t py_buffsize%d;\n" % num_bufs
num_bufs = num_bufs + 1
if c_call != "":
c_call = c_call + ", "
@@ -555,6 +555,7 @@ def buildStubs():
export.write("/* Generated */\n\n")
wrapper = open("libxml2-py.c", "w")
wrapper.write("/* Generated */\n\n")
+ wrapper.write("#define PY_SSIZE_T_CLEAN\n")
wrapper.write("#include <Python.h>\n")
wrapper.write("#include <libxml/xmlversion.h>\n")
wrapper.write("#include <libxml/tree.h>\n")
diff --git a/python/libxml.c b/python/libxml.c
index 81e709f3..3b66bd61 100644
--- a/python/libxml.c
+++ b/python/libxml.c
@@ -11,6 +11,7 @@
*
* daniel@veillard.com
*/
+#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <fileobject.h>
/* #include "config.h" */
@@ -1048,10 +1049,10 @@ pythonCharacters(void *user_data, const xmlChar * ch, int len)
if (type != 0) {
if (type == 1)
result = PyObject_CallMethod(handler, (char *) "characters",
- (char *) "s#", ch, len);
+ (char *) "s#", ch, (Py_ssize_t)len);
else if (type == 2)
result = PyObject_CallMethod(handler, (char *) "data",
- (char *) "s#", ch, len);
+ (char *) "s#", ch, (Py_ssize_t)len);
if (PyErr_Occurred())
PyErr_Print();
Py_XDECREF(result);
@@ -1078,11 +1079,11 @@ pythonIgnorableWhitespace(void *user_data, const xmlChar * ch, int len)
result =
PyObject_CallMethod(handler,
(char *) "ignorableWhitespace",
- (char *) "s#", ch, len);
+ (char *) "s#", ch, (Py_ssize_t)len);
else if (type == 2)
result =
PyObject_CallMethod(handler, (char *) "data",
- (char *) "s#", ch, len);
+ (char *) "s#", ch, (Py_ssize_t)len);
Py_XDECREF(result);
}
}
@@ -1223,11 +1224,11 @@ pythonCdataBlock(void *user_data, const xmlChar * ch, int len)
if (type == 1)
result =
PyObject_CallMethod(handler, (char *) "cdataBlock",
- (char *) "s#", ch, len);
+ (char *) "s#", ch, (Py_ssize_t)len);
else if (type == 2)
result =
PyObject_CallMethod(handler, (char *) "cdata",
- (char *) "s#", ch, len);
+ (char *) "s#", ch, (Py_ssize_t)len);
if (PyErr_Occurred())
PyErr_Print();
Py_XDECREF(result);
--
2.26.2

View File

@ -1 +1,3 @@
SHA512 (libxml2-2.9.10.tar.gz) = 0adfd12bfde89cbd6296ba6e66b6bed4edb814a74b4265bda34d95c41d9d92c696ee7adb0c737aaf9cc6e10426a31a35079b2a23d26c074e299858da12c072ed
SHA512 (libxml2-2.9.12.tar.gz) = df1c6486e80f0fcf3c506f3599bcfb94b620c00d0b5d26831bc983daa78d58ec58b5057b1ec7c1a26c694f40199c6234ee2a6dcabf65abfa10c447cb5705abbd
SHA512 (libxml2-2.9.12.tar.gz.asc) = 69ca6ab7170cad467724e19eff99a3544966a26069e78a7b7cc27ae93a9077b11cc8dad2536bd0b27c3b45f4ea7520c813fe5a018cd65f103059f7f75147a656
SHA512 (gpgkey-DB46681BB91ADCEA170FA2D415588B26596BEA5D.gpg) = b98316fb5298c7b217028037e3a2a6d9440acdf244236380012159ada705b75a65530fc165cca647f50764db06b608efdb4851c9847cdf52fd1fd018ac822c5b