From 4ac96ce046e9f58141bd66639ba8cb1fad9deefb Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 12 Jul 2023 16:59:07 +0200 Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml, depending on the availability of the RNG validation file. Some lxml distributions discard the validation schema file due to licensing issues. See https://bugs.launchpad.net/lxml/+bug/2024343 --- CHANGES.txt | 11 +++++++++++ doc/validation.txt | 9 +++++++++ src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++----- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index c684ad5..40e32cd 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,17 @@ lxml changelog ============== +4.9.2+ +====== + +* LP#2024343: The validation of the schema file itself is now optional in the + ISO-Schematron implementation. This was done because some lxml distributions + discard the RNG validation schema file due to licensing issues. The validation + can now always be disabled with ``Schematron(..., validate_schema=False)``. + It is enabled by default if available and disabled otherwise. The module + constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used + to detect whether schema file validation is available. + 4.9.2 (2022-12-13) ================== diff --git a/doc/validation.txt b/doc/validation.txt index af9d007..27c0ccd 100644 --- a/doc/validation.txt +++ b/doc/validation.txt @@ -615,6 +615,15 @@ The usage of validation phases is a unique feature of ISO-Schematron and can be a very powerful tool e.g. for establishing validation stages or to provide different validators for different "validation audiences". +Note: Some lxml distributions exclude the validation schema file due to licensing issues. +Since lxml 4.9.2-8, the validation of the user provided schema can be disabled with +``Schematron(..., validate_schema=False)``. +It is enabled by default if available and disabled otherwise. Previous versions of +lxml always had it enabled and failed at import time if the file was not available. +Thus, some distributions chose to remove the entire ISO-Schematron support. +The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used +since lxml 4.9.2-8 to detect whether schema file validation is available. + (Pre-ISO-Schematron) -------------------- diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py index 5967b10..2846a66 100644 --- a/src/lxml/isoschematron/__init__.py +++ b/src/lxml/isoschematron/__init__.py @@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( svrl_validation_errors = _etree.XPath( '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) - # RelaxNG validator for schematron schemas -schematron_schema_valid = _etree.RelaxNG( - file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) +schematron_schema_valid_supported = False +try: + schematron_schema_valid = _etree.RelaxNG( + file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) + schematron_schema_valid_supported = True +except _etree.RelaxNGParseError: + # Some distributions delete the file due to licensing issues. + def schematron_schema_valid(arg): + raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng") def stylesheet_params(**kwargs): @@ -153,6 +159,13 @@ class Schematron(_etree._Validator): report document gets stored and can be accessed as the ``validation_report`` property. + If ``validate_schema`` is set to False, the validation of the schema file + itself is disabled. Validation happens by default after building the full + schema, unless the schema validation file cannot be found at import time, + in which case the validation gets disabled. Some lxml distributions exclude + this file due to licensing issues. ISO-Schematron validation can then still + be used normally, but the schemas themselves cannot be validated. + Here is a usage example:: >>> from lxml import etree @@ -234,7 +247,8 @@ class Schematron(_etree._Validator): def __init__(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, store_schematron=False, store_xslt=False, store_report=False, - phase=None, error_finder=ASSERTS_ONLY): + phase=None, error_finder=ASSERTS_ONLY, + validate_schema=schematron_schema_valid_supported): super(Schematron, self).__init__() self._store_report = store_report @@ -273,7 +287,7 @@ class Schematron(_etree._Validator): schematron = self._include(schematron, **include_params) if expand: schematron = self._expand(schematron, **expand_params) - if not schematron_schema_valid(schematron): + if validate_schema and not schematron_schema_valid(schematron): raise _etree.SchematronParseError( "invalid schematron schema: %s" % schematron_schema_valid.error_log) -- 2.40.1