Fix python-lxml regression with 2.9.12
This commit is contained in:
parent
9d73c43a50
commit
8ce605432d
211
libxml2-2.9.12-fix-lxml-corrupted-tree.patch
Normal file
211
libxml2-2.9.12-fix-lxml-corrupted-tree.patch
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
From 85b1792e37b131e7a51af98a37f92472e8de5f3f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Nick Wellnhofer <wellnhofer@aevum.de>
|
||||||
|
Date: Tue, 18 May 2021 20:08:28 +0200
|
||||||
|
Subject: [PATCH] Work around lxml API abuse
|
||||||
|
|
||||||
|
Make xmlNodeDumpOutput and htmlNodeDumpFormatOutput work with corrupted
|
||||||
|
parent pointers. This used to work with the old recursive code but the
|
||||||
|
non-recursive rewrite required parent pointers to be set correctly.
|
||||||
|
|
||||||
|
Unfortunately, lxml relies on the old behavior and passes subtrees with
|
||||||
|
a corrupted structure. Fall back to a recursive function call if an
|
||||||
|
invalid parent pointer is detected.
|
||||||
|
|
||||||
|
Fixes #255.
|
||||||
|
---
|
||||||
|
HTMLtree.c | 46 ++++++++++++++++++++++++++++------------------
|
||||||
|
xmlsave.c | 31 +++++++++++++++++++++----------
|
||||||
|
2 files changed, 49 insertions(+), 28 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/HTMLtree.c b/HTMLtree.c
|
||||||
|
index 24434d45..bdd639c7 100644
|
||||||
|
--- a/HTMLtree.c
|
||||||
|
+++ b/HTMLtree.c
|
||||||
|
@@ -744,7 +744,7 @@ void
|
||||||
|
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
|
||||||
|
int format) {
|
||||||
|
- xmlNodePtr root;
|
||||||
|
+ xmlNodePtr root, parent;
|
||||||
|
xmlAttrPtr attr;
|
||||||
|
const htmlElemDesc * info;
|
||||||
|
|
||||||
|
@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
}
|
||||||
|
|
||||||
|
root = cur;
|
||||||
|
+ parent = cur->parent;
|
||||||
|
while (1) {
|
||||||
|
switch (cur->type) {
|
||||||
|
case XML_HTML_DOCUMENT_NODE:
|
||||||
|
@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
if (((xmlDocPtr) cur)->intSubset != NULL) {
|
||||||
|
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
|
||||||
|
}
|
||||||
|
- if (cur->children != NULL) {
|
||||||
|
+ /* Always validate cur->parent when descending. */
|
||||||
|
+ if ((cur->parent == parent) && (cur->children != NULL)) {
|
||||||
|
+ parent = cur;
|
||||||
|
cur = cur->children;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XML_ELEMENT_NODE:
|
||||||
|
+ /*
|
||||||
|
+ * Some users like lxml are known to pass nodes with a corrupted
|
||||||
|
+ * tree structure. Fall back to a recursive call to handle this
|
||||||
|
+ * case.
|
||||||
|
+ */
|
||||||
|
+ if ((cur->parent != parent) && (cur->children != NULL)) {
|
||||||
|
+ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Get specific HTML info for that node.
|
||||||
|
*/
|
||||||
|
@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
(cur->name != NULL) &&
|
||||||
|
(cur->name[0] != 'p')) /* p, pre, param */
|
||||||
|
xmlOutputBufferWriteString(buf, "\n");
|
||||||
|
+ parent = cur;
|
||||||
|
cur = cur->children;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
(info != NULL) && (!info->isinline)) {
|
||||||
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||||
|
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||||
|
- (cur->parent != NULL) &&
|
||||||
|
- (cur->parent->name != NULL) &&
|
||||||
|
- (cur->parent->name[0] != 'p')) /* p, pre, param */
|
||||||
|
+ (parent != NULL) &&
|
||||||
|
+ (parent->name != NULL) &&
|
||||||
|
+ (parent->name[0] != 'p')) /* p, pre, param */
|
||||||
|
xmlOutputBufferWriteString(buf, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
break;
|
||||||
|
if (((cur->name == (const xmlChar *)xmlStringText) ||
|
||||||
|
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
|
||||||
|
- ((cur->parent == NULL) ||
|
||||||
|
- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
|
||||||
|
- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
|
||||||
|
+ ((parent == NULL) ||
|
||||||
|
+ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
|
||||||
|
+ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
|
||||||
|
xmlChar *buffer;
|
||||||
|
|
||||||
|
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
||||||
|
@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * The parent should never be NULL here but we want to handle
|
||||||
|
- * corrupted documents gracefully.
|
||||||
|
- */
|
||||||
|
- if (cur->parent == NULL)
|
||||||
|
- return;
|
||||||
|
- cur = cur->parent;
|
||||||
|
+ cur = parent;
|
||||||
|
+ /* cur->parent was validated when descending. */
|
||||||
|
+ parent = cur->parent;
|
||||||
|
|
||||||
|
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
||||||
|
(cur->type == XML_DOCUMENT_NODE)) {
|
||||||
|
@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
|
(cur->next != NULL)) {
|
||||||
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||||
|
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||||
|
- (cur->parent != NULL) &&
|
||||||
|
- (cur->parent->name != NULL) &&
|
||||||
|
- (cur->parent->name[0] != 'p')) /* p, pre, param */
|
||||||
|
+ (parent != NULL) &&
|
||||||
|
+ (parent->name != NULL) &&
|
||||||
|
+ (parent->name[0] != 'p')) /* p, pre, param */
|
||||||
|
xmlOutputBufferWriteString(buf, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/xmlsave.c b/xmlsave.c
|
||||||
|
index 61a40459..aedbd5e7 100644
|
||||||
|
--- a/xmlsave.c
|
||||||
|
+++ b/xmlsave.c
|
||||||
|
@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
|
static void
|
||||||
|
xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
|
int format = ctxt->format;
|
||||||
|
- xmlNodePtr tmp, root, unformattedNode = NULL;
|
||||||
|
+ xmlNodePtr tmp, root, unformattedNode = NULL, parent;
|
||||||
|
xmlAttrPtr attr;
|
||||||
|
xmlChar *start, *end;
|
||||||
|
xmlOutputBufferPtr buf;
|
||||||
|
@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
|
buf = ctxt->buf;
|
||||||
|
|
||||||
|
root = cur;
|
||||||
|
+ parent = cur->parent;
|
||||||
|
while (1) {
|
||||||
|
switch (cur->type) {
|
||||||
|
case XML_DOCUMENT_NODE:
|
||||||
|
@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XML_DOCUMENT_FRAG_NODE:
|
||||||
|
- if (cur->children != NULL) {
|
||||||
|
+ /* Always validate cur->parent when descending. */
|
||||||
|
+ if ((cur->parent == parent) && (cur->children != NULL)) {
|
||||||
|
+ parent = cur;
|
||||||
|
cur = cur->children;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XML_ELEMENT_NODE:
|
||||||
|
- if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
|
||||||
|
+ /*
|
||||||
|
+ * Some users like lxml are known to pass nodes with a corrupted
|
||||||
|
+ * tree structure. Fall back to a recursive call to handle this
|
||||||
|
+ * case.
|
||||||
|
+ */
|
||||||
|
+ if ((cur->parent != parent) && (cur->children != NULL)) {
|
||||||
|
+ xmlNodeDumpOutputInternal(ctxt, cur);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if ((ctxt->level > 0) && (ctxt->format == 1) &&
|
||||||
|
+ (xmlIndentTreeOutput))
|
||||||
|
xmlOutputBufferWrite(buf, ctxt->indent_size *
|
||||||
|
(ctxt->level > ctxt->indent_nr ?
|
||||||
|
ctxt->indent_nr : ctxt->level),
|
||||||
|
@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
|
xmlOutputBufferWrite(buf, 1, ">");
|
||||||
|
if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
|
||||||
|
if (ctxt->level >= 0) ctxt->level++;
|
||||||
|
+ parent = cur;
|
||||||
|
cur = cur->children;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * The parent should never be NULL here but we want to handle
|
||||||
|
- * corrupted documents gracefully.
|
||||||
|
- */
|
||||||
|
- if (cur->parent == NULL)
|
||||||
|
- return;
|
||||||
|
- cur = cur->parent;
|
||||||
|
+ cur = parent;
|
||||||
|
+ /* cur->parent was validated when descending. */
|
||||||
|
+ parent = cur->parent;
|
||||||
|
|
||||||
|
if (cur->type == XML_ELEMENT_NODE) {
|
||||||
|
if (ctxt->level > 0) ctxt->level--;
|
||||||
|
--
|
||||||
|
GitLab
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
Name: libxml2
|
Name: libxml2
|
||||||
Version: 2.9.12
|
Version: 2.9.12
|
||||||
Release: 1%{?dist}
|
Release: 2%{?dist}
|
||||||
Summary: Library providing XML and HTML support
|
Summary: Library providing XML and HTML support
|
||||||
|
|
||||||
License: MIT
|
License: MIT
|
||||||
@ -14,6 +14,8 @@ Patch0: libxml2-multilib.patch
|
|||||||
# Patch from openSUSE.
|
# Patch from openSUSE.
|
||||||
# See: https://bugzilla.gnome.org/show_bug.cgi?id=789714
|
# See: https://bugzilla.gnome.org/show_bug.cgi?id=789714
|
||||||
Patch1: libxml2-2.9.8-python3-unicode-errors.patch
|
Patch1: libxml2-2.9.8-python3-unicode-errors.patch
|
||||||
|
# https://gitlab.gnome.org/GNOME/libxml2/-/issues/255
|
||||||
|
Patch2: libxml2-2.9.12-fix-lxml-corrupted-tree.patch
|
||||||
|
|
||||||
BuildRequires: cmake-rpm-macros
|
BuildRequires: cmake-rpm-macros
|
||||||
BuildRequires: gcc
|
BuildRequires: gcc
|
||||||
@ -144,6 +146,9 @@ gzip -9 -c doc/libxml2-api.xml > doc/libxml2-api.xml.gz
|
|||||||
%{python3_sitearch}/libxml2mod.so
|
%{python3_sitearch}/libxml2mod.so
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed May 19 2021 David King <amigadave@amigadave.com> - 2.9.12-2
|
||||||
|
- Fix python-lxml regression with 2.9.12
|
||||||
|
|
||||||
* Thu May 13 2021 David King <amigadave@amigadave.com> - 2.9.12-1
|
* Thu May 13 2021 David King <amigadave@amigadave.com> - 2.9.12-1
|
||||||
- Update to 2.9.12 (#1960153)
|
- Update to 2.9.12 (#1960153)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user