117 lines
5.1 KiB
Diff
117 lines
5.1 KiB
Diff
|
From a500f721e3b34018f0a86af275427663dc337b5a Mon Sep 17 00:00:00 2001
|
||
|
From: Stefan Behnel <stefan_ml@behnel.de>
|
||
|
Date: Wed, 12 Jul 2023 16:59:07 +0200
|
||
|
Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml,
|
||
|
depending on the availability of the RNG validation file. Some lxml
|
||
|
distributions discard the validation schema file due to licensing issues.
|
||
|
|
||
|
See https://bugs.launchpad.net/lxml/+bug/2024343
|
||
|
---
|
||
|
CHANGES.txt | 11 +++++++++++
|
||
|
doc/validation.txt | 9 +++++++++
|
||
|
src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++-----
|
||
|
3 files changed, 39 insertions(+), 5 deletions(-)
|
||
|
|
||
|
diff --git a/CHANGES.txt b/CHANGES.txt
|
||
|
index 24052db..e68ee9a 100644
|
||
|
--- a/CHANGES.txt
|
||
|
+++ b/CHANGES.txt
|
||
|
@@ -2,6 +2,17 @@
|
||
|
lxml changelog
|
||
|
==============
|
||
|
|
||
|
+4.9.3+
|
||
|
+======
|
||
|
+
|
||
|
+* LP#2024343: The validation of the schema file itself is now optional in the
|
||
|
+ ISO-Schematron implementation. This was done because some lxml distributions
|
||
|
+ discard the RNG validation schema file due to licensing issues. The validation
|
||
|
+ can now always be disabled with ``Schematron(..., validate_schema=False)``.
|
||
|
+ It is enabled by default if available and disabled otherwise. The module
|
||
|
+ constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used
|
||
|
+ to detect whether schema file validation is available.
|
||
|
+
|
||
|
4.9.3 (2023-07-05)
|
||
|
==================
|
||
|
|
||
|
diff --git a/doc/validation.txt b/doc/validation.txt
|
||
|
index af9d007..27c0ccd 100644
|
||
|
--- a/doc/validation.txt
|
||
|
+++ b/doc/validation.txt
|
||
|
@@ -615,6 +615,15 @@ The usage of validation phases is a unique feature of ISO-Schematron and can be
|
||
|
a very powerful tool e.g. for establishing validation stages or to provide
|
||
|
different validators for different "validation audiences".
|
||
|
|
||
|
+Note: Some lxml distributions exclude the validation schema file due to licensing issues.
|
||
|
+Since lxml 4.9.2-8, the validation of the user provided schema can be disabled with
|
||
|
+``Schematron(..., validate_schema=False)``.
|
||
|
+It is enabled by default if available and disabled otherwise. Previous versions of
|
||
|
+lxml always had it enabled and failed at import time if the file was not available.
|
||
|
+Thus, some distributions chose to remove the entire ISO-Schematron support.
|
||
|
+The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used
|
||
|
+since lxml 4.9.2-8 to detect whether schema file validation is available.
|
||
|
+
|
||
|
(Pre-ISO-Schematron)
|
||
|
--------------------
|
||
|
|
||
|
diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py
|
||
|
index 5967b10..2846a66 100644
|
||
|
--- a/src/lxml/isoschematron/__init__.py
|
||
|
+++ b/src/lxml/isoschematron/__init__.py
|
||
|
@@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
|
||
|
svrl_validation_errors = _etree.XPath(
|
||
|
'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
|
||
|
|
||
|
-
|
||
|
# RelaxNG validator for schematron schemas
|
||
|
-schematron_schema_valid = _etree.RelaxNG(
|
||
|
- file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
|
||
|
+schematron_schema_valid_supported = False
|
||
|
+try:
|
||
|
+ schematron_schema_valid = _etree.RelaxNG(
|
||
|
+ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
|
||
|
+ schematron_schema_valid_supported = True
|
||
|
+except _etree.RelaxNGParseError:
|
||
|
+ # Some distributions delete the file due to licensing issues.
|
||
|
+ def schematron_schema_valid(arg):
|
||
|
+ raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng")
|
||
|
|
||
|
|
||
|
def stylesheet_params(**kwargs):
|
||
|
@@ -153,6 +159,13 @@ class Schematron(_etree._Validator):
|
||
|
report document gets stored and can be accessed as the ``validation_report``
|
||
|
property.
|
||
|
|
||
|
+ If ``validate_schema`` is set to False, the validation of the schema file
|
||
|
+ itself is disabled. Validation happens by default after building the full
|
||
|
+ schema, unless the schema validation file cannot be found at import time,
|
||
|
+ in which case the validation gets disabled. Some lxml distributions exclude
|
||
|
+ this file due to licensing issues. ISO-Schematron validation can then still
|
||
|
+ be used normally, but the schemas themselves cannot be validated.
|
||
|
+
|
||
|
Here is a usage example::
|
||
|
|
||
|
>>> from lxml import etree
|
||
|
@@ -234,7 +247,8 @@ class Schematron(_etree._Validator):
|
||
|
def __init__(self, etree=None, file=None, include=True, expand=True,
|
||
|
include_params={}, expand_params={}, compile_params={},
|
||
|
store_schematron=False, store_xslt=False, store_report=False,
|
||
|
- phase=None, error_finder=ASSERTS_ONLY):
|
||
|
+ phase=None, error_finder=ASSERTS_ONLY,
|
||
|
+ validate_schema=schematron_schema_valid_supported):
|
||
|
super(Schematron, self).__init__()
|
||
|
|
||
|
self._store_report = store_report
|
||
|
@@ -273,7 +287,7 @@ class Schematron(_etree._Validator):
|
||
|
schematron = self._include(schematron, **include_params)
|
||
|
if expand:
|
||
|
schematron = self._expand(schematron, **expand_params)
|
||
|
- if not schematron_schema_valid(schematron):
|
||
|
+ if validate_schema and not schematron_schema_valid(schematron):
|
||
|
raise _etree.SchematronParseError(
|
||
|
"invalid schematron schema: %s" %
|
||
|
schematron_schema_valid.error_log)
|
||
|
--
|
||
|
2.40.1
|
||
|
|