diff --git a/.gitignore b/.gitignore index dfc2d49..5a47e6b 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,5 @@ lxml-2.2.7.tar.gz.asc /lxml-4.9.3-no-isoschematron-rng.tar.gz /lxml-4.9.4-no-isoschematron-rng.tar.gz /lxml-5.1.0-no-isoschematron-rng.tar.gz +/lxml-5.2.0-no-isoschematron-rng.tar.gz +/lxml-5.2.1-no-isoschematron-rng.tar.gz diff --git a/407.patch b/407.patch deleted file mode 100644 index 81e3203..0000000 --- a/407.patch +++ /dev/null @@ -1,98 +0,0 @@ -From e3012a702dea2b03830fe00a5e8f7a429bbc3f42 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= -Date: Tue, 27 Feb 2024 18:43:45 +0100 -Subject: [PATCH] Fix test_elementtree with Expat 2.6.0 - -Feeding the parser by too small chunks defers parsing to prevent -CVE-2023-52425. Future versions of Expat may be more reactive. - -Heavily inspired by https://github.com/python/cpython/commit/4a08e7b3431cd32a0daf22a33421cd3035343dc4 - -We cannot use a @fails_with_expat_2_6_0 decorator -because the test passes in ETreePullTestCase. - -Co-Authored-By: Serhiy Storchaka ---- - src/lxml/tests/test_elementtree.py | 62 +++++++++++++++++++----------- - 1 file changed, 39 insertions(+), 23 deletions(-) - -diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py -index 8ccf4442a..ef923c5ce 100644 ---- a/src/lxml/tests/test_elementtree.py -+++ b/src/lxml/tests/test_elementtree.py -@@ -10,6 +10,7 @@ - import io - import operator - import os -+import pyexpat - import re - import sys - import textwrap -@@ -4383,29 +4384,44 @@ def assert_event_tags(self, parser, expected, max_events=None): - self.assertEqual([(action, elem.tag) for action, elem in events], - expected) - -- def test_simple_xml(self): -- for chunk_size in (None, 1, 5): -- #with self.subTest(chunk_size=chunk_size): -- parser = self.etree.XMLPullParser() -- self.assert_event_tags(parser, []) -- self._feed(parser, "\n", chunk_size) -- self.assert_event_tags(parser, []) -- self._feed(parser, -- "\n text\n", chunk_size) -- self.assert_event_tags(parser, [('end', 'element')]) -- self._feed(parser, "texttail\n", chunk_size) -- self._feed(parser, "\n", chunk_size) -- self.assert_event_tags(parser, [ -- ('end', 'element'), -- ('end', 'empty-element'), -- ]) -- self._feed(parser, "\n", chunk_size) -- self.assert_event_tags(parser, [('end', 'root')]) -- root = self._close_and_return_root(parser) -- self.assertEqual(root.tag, 'root') -+ def test_simple_xml(self, chunk_size=None): -+ parser = self.etree.XMLPullParser() -+ self.assert_event_tags(parser, []) -+ self._feed(parser, "\n", chunk_size) -+ self.assert_event_tags(parser, []) -+ self._feed(parser, -+ "\n text\n", chunk_size) -+ self.assert_event_tags(parser, [('end', 'element')]) -+ self._feed(parser, "texttail\n", chunk_size) -+ self._feed(parser, "\n", chunk_size) -+ self.assert_event_tags(parser, [ -+ ('end', 'element'), -+ ('end', 'empty-element'), -+ ]) -+ self._feed(parser, "\n", chunk_size) -+ self.assert_event_tags(parser, [('end', 'root')]) -+ root = self._close_and_return_root(parser) -+ self.assertEqual(root.tag, 'root') -+ -+ def test_simple_xml_chunk_1(self): -+ if self.etree is not etree and pyexpat.version_info >= (2, 6, 0): -+ raise unittest.SkipTest( -+ "Feeding the parser by too small chunks defers parsing" -+ ) -+ self.test_simple_xml(chunk_size=1) -+ -+ def test_simple_xml_chunk_5(self): -+ if self.etree is not etree and pyexpat.version_info >= (2, 6, 0): -+ raise unittest.SkipTest( -+ "Feeding the parser by too small chunks defers parsing" -+ ) -+ self.test_simple_xml(chunk_size=5) -+ -+ def test_simple_xml_chunk_22(self): -+ self.test_simple_xml(chunk_size=22) - - def test_feed_while_iterating(self): - parser = self.etree.XMLPullParser() diff --git a/417.patch b/417.patch new file mode 100644 index 0000000..c748c0a --- /dev/null +++ b/417.patch @@ -0,0 +1,40 @@ +From f5f64df808b35969794ba3ea8c19079276aa0cb0 Mon Sep 17 00:00:00 2001 +From: Lumir Balhar +Date: Thu, 4 Apr 2024 11:11:38 +0200 +Subject: [PATCH] Skip test_feedparser_data if lxml_html_clean is not available + +This is useful mostly for distributors shipping lxml without +lxml_html_clean. +--- + src/lxml/html/tests/test_feedparser_data.py | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/src/lxml/html/tests/test_feedparser_data.py b/src/lxml/html/tests/test_feedparser_data.py +index 36fba1acb..264c0d4b5 100644 +--- a/src/lxml/html/tests/test_feedparser_data.py ++++ b/src/lxml/html/tests/test_feedparser_data.py +@@ -9,7 +9,11 @@ + from lxml.tests.common_imports import doctest + from lxml.doctestcompare import LHTMLOutputChecker + +-from lxml.html.clean import clean, Cleaner ++try: ++ from lxml.html.clean import clean, Cleaner ++ html_clean_available = True ++except ImportError: ++ html_clean_available = False + + feed_dirs = [ + os.path.join(os.path.dirname(__file__), 'feedparser-data'), +@@ -80,6 +84,11 @@ def shortDescription(self): + + def test_suite(): + suite = unittest.TestSuite() ++ ++ if not html_clean_available: ++ print("Skipping tests in feedparser_data - external lxml_html_clean package is not installed") ++ return suite ++ + for dir in feed_dirs: + for fn in os.listdir(dir): + fn = os.path.join(dir, fn) diff --git a/python-lxml.spec b/python-lxml.spec index 8237b01..53711ae 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -1,5 +1,5 @@ Name: python-lxml -Version: 5.1.0 +Version: 5.2.1 Release: %autorelease Summary: XML processing library combining libxml2/libxslt with the ElementTree API @@ -16,9 +16,8 @@ URL: https://github.com/lxml/lxml Source0: lxml-%{version}-no-isoschematron-rng.tar.gz Source1: get-lxml-source.sh -# Fix test_elementtree with Expat 2.6.0 -# Merged upstream -Patch: https://github.com/lxml/lxml/pull/407.patch +# Skip some tests if lxml_html_clean is not available +Patch: https://github.com/lxml/lxml/pull/417.patch BuildRequires: gcc BuildRequires: libxml2-devel @@ -29,6 +28,7 @@ BuildRequires: python3-devel # - [cssselect] Requires cssselect BuildRequires lxml # - [html5] Requires html5lib BuildRequires lxml # - [htmlsoup] Requires beautifulsoup4 Requires lxml +# - [html_clean] Requires lxml-html-clean Requires lxml # Hence we provide a bcond to disable the extras altogether. # By default, the extras are disabled in RHEL, to avoid dependencies. %bcond extras %{undefined rhel} @@ -47,6 +47,7 @@ Summary: %{summary} Suggests: python3-lxml+cssselect Suggests: python3-lxml+html5 Suggests: python3-lxml+htmlsoup +Suggests: python3-lxml+html_clean %endif %description -n python3-lxml %{_description} @@ -54,7 +55,7 @@ Suggests: python3-lxml+htmlsoup Python 3 version. %if %{with extras} -%pyproject_extras_subpkg -n python3-lxml cssselect html5 htmlsoup +%pyproject_extras_subpkg -n python3-lxml cssselect html5 htmlsoup html_clean %endif %prep @@ -67,7 +68,7 @@ sed -i "s/Cython.*/Cython/" requirements.txt sed -i 's/"Cython.*",/"Cython",/' pyproject.toml %generate_buildrequires -%pyproject_buildrequires -x source%{?with_extras:,cssselect,html5,htmlsoup} +%pyproject_buildrequires -x source%{?with_extras:,cssselect,html5,htmlsoup,html_clean} %build # Remove pregenerated Cython C sources diff --git a/sources b/sources index c2b3845..2274f55 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (lxml-5.1.0-no-isoschematron-rng.tar.gz) = f4b65c0189c89742fb4be6a3e73b08e7e7338272b71482e64be75dc8d53cebc769c0520a86a46579328fa0ec4377bb2bd860338550b1098d26c8f509fcedc664 +SHA512 (lxml-5.2.1-no-isoschematron-rng.tar.gz) = 7bfd9caf7e45157726e16c7286bdcb1b4dfa24df000df96d6571d65f59760ea5747e364da788cee3116977810b870ab90e451b519701f5cc26f1111a2528c996