diff --git a/.gitignore b/.gitignore index d2660f9..c133ee9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /lxml-4.9.1.tar.gz /lxml-4.9.2.tar.gz /lxml-4.9.2-no-isoschematron.tar.gz +/lxml-4.9.2-no-isoschematron-rng.tar.gz diff --git a/380.patch b/380.patch new file mode 100644 index 0000000..daac445 --- /dev/null +++ b/380.patch @@ -0,0 +1,24 @@ +From d18f2f22218ea0e0b5327b5a2bda789afdf16e41 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= +Date: Fri, 14 Jul 2023 12:18:25 +0200 +Subject: [PATCH] Skip test_isoschematron.test_schematron_invalid_schema_empty + without the RNG file + +The expected SchematronParseError only happens when validate_schema is true. +--- + src/lxml/tests/test_isoschematron.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/lxml/tests/test_isoschematron.py b/src/lxml/tests/test_isoschematron.py +index 6d2aa3fb6..900f257c3 100644 +--- a/src/lxml/tests/test_isoschematron.py ++++ b/src/lxml/tests/test_isoschematron.py +@@ -55,6 +55,8 @@ def test_schematron_empty_pattern(self): + schema = isoschematron.Schematron(schema) + self.assertTrue(schema) + ++ @unittest.skipIf(not isoschematron.schematron_schema_valid_supported, ++ 'SchematronParseError is risen only when validate_schema is true') + def test_schematron_invalid_schema_empty(self): + schema = self.parse('''\ + diff --git a/Make-the-validation-of-ISO-Schematron-files-optional.patch b/Make-the-validation-of-ISO-Schematron-files-optional.patch new file mode 100644 index 0000000..d07006e --- /dev/null +++ b/Make-the-validation-of-ISO-Schematron-files-optional.patch @@ -0,0 +1,116 @@ +From 4ac96ce046e9f58141bd66639ba8cb1fad9deefb Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Wed, 12 Jul 2023 16:59:07 +0200 +Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml, + depending on the availability of the RNG validation file. Some lxml + distributions discard the validation schema file due to licensing issues. + +See https://bugs.launchpad.net/lxml/+bug/2024343 +--- + CHANGES.txt | 11 +++++++++++ + doc/validation.txt | 9 +++++++++ + src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++----- + 3 files changed, 39 insertions(+), 5 deletions(-) + +diff --git a/CHANGES.txt b/CHANGES.txt +index c684ad5..40e32cd 100644 +--- a/CHANGES.txt ++++ b/CHANGES.txt +@@ -2,6 +2,17 @@ + lxml changelog + ============== + ++4.9.2+ ++====== ++ ++* LP#2024343: The validation of the schema file itself is now optional in the ++ ISO-Schematron implementation. This was done because some lxml distributions ++ discard the RNG validation schema file due to licensing issues. The validation ++ can now always be disabled with ``Schematron(..., validate_schema=False)``. ++ It is enabled by default if available and disabled otherwise. The module ++ constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used ++ to detect whether schema file validation is available. ++ + 4.9.2 (2022-12-13) + ================== + +diff --git a/doc/validation.txt b/doc/validation.txt +index af9d007..27c0ccd 100644 +--- a/doc/validation.txt ++++ b/doc/validation.txt +@@ -615,6 +615,15 @@ The usage of validation phases is a unique feature of ISO-Schematron and can be + a very powerful tool e.g. for establishing validation stages or to provide + different validators for different "validation audiences". + ++Note: Some lxml distributions exclude the validation schema file due to licensing issues. ++Since lxml 4.9.2-8, the validation of the user provided schema can be disabled with ++``Schematron(..., validate_schema=False)``. ++It is enabled by default if available and disabled otherwise. Previous versions of ++lxml always had it enabled and failed at import time if the file was not available. ++Thus, some distributions chose to remove the entire ISO-Schematron support. ++The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used ++since lxml 4.9.2-8 to detect whether schema file validation is available. ++ + (Pre-ISO-Schematron) + -------------------- + +diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py +index 5967b10..2846a66 100644 +--- a/src/lxml/isoschematron/__init__.py ++++ b/src/lxml/isoschematron/__init__.py +@@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( + svrl_validation_errors = _etree.XPath( + '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) + +- + # RelaxNG validator for schematron schemas +-schematron_schema_valid = _etree.RelaxNG( +- file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) ++schematron_schema_valid_supported = False ++try: ++ schematron_schema_valid = _etree.RelaxNG( ++ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) ++ schematron_schema_valid_supported = True ++except _etree.RelaxNGParseError: ++ # Some distributions delete the file due to licensing issues. ++ def schematron_schema_valid(arg): ++ raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng") + + + def stylesheet_params(**kwargs): +@@ -153,6 +159,13 @@ class Schematron(_etree._Validator): + report document gets stored and can be accessed as the ``validation_report`` + property. + ++ If ``validate_schema`` is set to False, the validation of the schema file ++ itself is disabled. Validation happens by default after building the full ++ schema, unless the schema validation file cannot be found at import time, ++ in which case the validation gets disabled. Some lxml distributions exclude ++ this file due to licensing issues. ISO-Schematron validation can then still ++ be used normally, but the schemas themselves cannot be validated. ++ + Here is a usage example:: + + >>> from lxml import etree +@@ -234,7 +247,8 @@ class Schematron(_etree._Validator): + def __init__(self, etree=None, file=None, include=True, expand=True, + include_params={}, expand_params={}, compile_params={}, + store_schematron=False, store_xslt=False, store_report=False, +- phase=None, error_finder=ASSERTS_ONLY): ++ phase=None, error_finder=ASSERTS_ONLY, ++ validate_schema=schematron_schema_valid_supported): + super(Schematron, self).__init__() + + self._store_report = store_report +@@ -273,7 +287,7 @@ class Schematron(_etree._Validator): + schematron = self._include(schematron, **include_params) + if expand: + schematron = self._expand(schematron, **expand_params) +- if not schematron_schema_valid(schematron): ++ if validate_schema and not schematron_schema_valid(schematron): + raise _etree.SchematronParseError( + "invalid schematron schema: %s" % + schematron_schema_valid.error_log) +-- +2.40.1 + diff --git a/get-lxml-source.sh b/get-lxml-source.sh index 9d79f3a..ae069f1 100755 --- a/get-lxml-source.sh +++ b/get-lxml-source.sh @@ -13,17 +13,16 @@ fi versionedname=lxml-${version} orig_archive=${versionedname}.tar.gz -new_archive=${versionedname}-no-isoschematron.tar.gz +new_archive=${versionedname}-no-isoschematron-rng.tar.gz if [ ! -e ${orig_archive} ]; then wget -N https://files.pythonhosted.org/packages/source/l/lxml/${orig_archive} fi -deleted_module=lxml-${version}/src/lxml/isoschematron/ -deleted_test=lxml-${version}/src/lxml/tests/test_isoschematron.py +deleted_directory=lxml-${version}/src/lxml/isoschematron/resources/rng # tar --delete does not operate on compressed archives, so do # gz decompression explicitly gzip --decompress ${orig_archive} -tar -v --delete -f ${orig_archive//.gz} {$deleted_module,$deleted_test} +tar -v --delete -f ${orig_archive//.gz} ${deleted_directory} gzip -cf ${orig_archive//.gz} > ${new_archive} diff --git a/python3.11-lxml.spec b/python3.11-lxml.spec index e3d2ccc..a115faf 100644 --- a/python3.11-lxml.spec +++ b/python3.11-lxml.spec @@ -3,7 +3,7 @@ Name: python%{python3_pkgversion}-lxml Version: 4.9.2 -Release: 3%{?dist} +Release: 4%{?dist} Summary: XML processing library combining libxml2/libxslt with the ElementTree API # The lxml project is licensed under BSD-3-Clause @@ -12,13 +12,21 @@ Summary: XML processing library combining libxml2/libxslt with the Elemen # .xsl schematron files are under the MIT license License: BSD and MIT URL: https://github.com/lxml/lxml + # We use the get-lxml-source.sh script to generate the tarball -# without the isoschematron submodule as it contains a problematic -# license. +# without the isoschematron RNG validation file under a problematic license. # See: https://gitlab.com/fedora/legal/fedora-license-data/-/issues/154 -Source0: lxml-%{version}-no-isoschematron.tar.gz +Source0: lxml-%{version}-no-isoschematron-rng.tar.gz Source1: get-lxml-source.sh +# Make the validation of ISO-Schematron files optional in lxml, +# depending on the availability of the RNG validation file +# Rebased from https://github.com/lxml/lxml/commit/4bfab2c821961fb4c5ed8a04e329778c9b09a1df +# Will be included in lxml 5.0 +Patch: Make-the-validation-of-ISO-Schematron-files-optional.patch +# Skip test_isoschematron.test_schematron_invalid_schema_empty without the RNG file +Patch: https://github.com/lxml/lxml/pull/380.patch + BuildRequires: gcc BuildRequires: libxml2-devel BuildRequires: libxslt-devel @@ -39,12 +47,6 @@ XML Schema, XSLT, C14N and much more. %prep %autosetup -n lxml-%{version} -p1 -# Remove isoschematron module due to problematic license -sed -i "s/, 'lxml.isoschematron'//" setup.py -# Remove the doctests for it (the documentation is not shipped) -# The command [d]eletes all lines from the first pattern to the second -sed -Ei '/^Schematron$/,/^\(Pre-ISO-Schematron\)$/d' doc/validation.txt - # Remove pregenerated Cython C sources # We need to do this after %%pyproject_buildrequires because setup.py errors # without Cython and without the .c files. @@ -70,6 +72,11 @@ cp -a build/lib.%{python3_platform}-*/* src/ %{python3_sitearch}/lxml-*.egg-info/ %changelog +* Tue Aug 15 2023 Tomas Orsava - 4.9.2-4 +- Bring back the isoschematron submodule, + but without the validation of the schema file itself +- Resolves: RHEL-5570 + * Thu Feb 16 2023 Charalampos Stratakis - 4.9.2-3 - Remove the isoschematron submodule diff --git a/sources b/sources index 2599e86..dcb73a3 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (lxml-4.9.2-no-isoschematron.tar.gz) = 3ee80967dfa69c840a27ad99546727346f8bb058ed64bf93d2aa8959fb3000e78c1227f4b8709d25aaadeb9a4ccdf97a03eb53a1171de8feda93fbeaa15a8b39 +SHA512 (lxml-4.9.2-no-isoschematron-rng.tar.gz) = 7de5a626bb87f2f56f90a3c308199ed79d78796eeb075b5d42df4bccf3606d3fb2e2981fa9206412b675304dd487fce2dfb90b3b3e0793e8d63be2216b325c47