524 lines
20 KiB
Diff
524 lines
20 KiB
Diff
From 3191e76a3e6d0b4a89bbf9ec52bbc84aa24e22b8 Mon Sep 17 00:00:00 2001
|
|
From: Waylan Limberg <waylan.limberg@icloud.com>
|
|
Date: Wed, 18 Jun 2025 10:29:03 -0400
|
|
Subject: [PATCH 1/4] Ensure incomplete markup declaration in raw HTML doesn't
|
|
crash parser.
|
|
|
|
See Python bug report at gh-77057 for details. Until we drop support for
|
|
Python < 3.13 (where this was fixed upstream), we need to avoid the
|
|
unwanted error by checking for it explicitly. Fixes #1534.
|
|
---
|
|
markdown/extensions/md_in_html.py | 4 ++++
|
|
markdown/htmlparser.py | 4 ++++
|
|
tests/test_syntax/blocks/test_html_blocks.py | 7 +++++++
|
|
3 files changed, 15 insertions(+)
|
|
|
|
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
|
|
index 982d603..99001ca 100644
|
|
--- a/markdown/extensions/md_in_html.py
|
|
+++ b/markdown/extensions/md_in_html.py
|
|
@@ -227,6 +227,10 @@ class HTMLExtractorExtra(HTMLExtractor):
|
|
|
|
def parse_html_declaration(self, i):
|
|
if self.at_line_start() or self.intail or self.mdstack:
|
|
+ if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':
|
|
+ # We have encountered the bug in #1534 (Python bug `gh-77057`).
|
|
+ # Provide an override until we drop support for Python < 3.13.
|
|
+ return self.parse_bogus_comment(i)
|
|
# The same override exists in `HTMLExtractor` without the check
|
|
# for `mdstack`. Therefore, use parent of `HTMLExtractor` instead.
|
|
return super(HTMLExtractor, self).parse_html_declaration(i)
|
|
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
|
|
index 29e2300..b9b9c6f 100644
|
|
--- a/markdown/htmlparser.py
|
|
+++ b/markdown/htmlparser.py
|
|
@@ -267,6 +267,10 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
|
|
def parse_html_declaration(self, i: int) -> int:
|
|
if self.at_line_start() or self.intail:
|
|
+ if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':
|
|
+ # We have encountered the bug in #1534 (Python bug `gh-77057`).
|
|
+ # Provide an override until we drop support for Python < 3.13.
|
|
+ return self.parse_bogus_comment(i)
|
|
return super().parse_html_declaration(i)
|
|
# This is not the beginning of a raw block so treat as plain data
|
|
# and avoid consuming any tags which may follow (see #1066).
|
|
diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py
|
|
index 22b9498..e8c37b3 100644
|
|
--- a/tests/test_syntax/blocks/test_html_blocks.py
|
|
+++ b/tests/test_syntax/blocks/test_html_blocks.py
|
|
@@ -1275,6 +1275,13 @@ class TestHTMLBlocks(TestCase):
|
|
)
|
|
)
|
|
|
|
+ def test_not_actually_cdata(self):
|
|
+ # Ensure bug reported in #1534 is avoided.
|
|
+ self.assertMarkdownRenders(
|
|
+ '<![',
|
|
+ '<p><![</p>'
|
|
+ )
|
|
+
|
|
def test_raw_cdata_code_span(self):
|
|
self.assertMarkdownRenders(
|
|
self.dedent(
|
|
--
|
|
2.54.0
|
|
|
|
|
|
From 1a38a252e759b6a7d86710640af2b7157feff51c Mon Sep 17 00:00:00 2001
|
|
From: Isaac Muse <faceless.shop@gmail.com>
|
|
Date: Thu, 19 Jun 2025 09:46:13 -0600
|
|
Subject: [PATCH 2/4] Fixes for Python 3.14
|
|
|
|
- Fix codecs deprecation
|
|
- Fix issue with unclosed `<![`
|
|
- Fix issue with unclosed HTML tag `<foo`
|
|
- Fix issue with unclosed comments
|
|
- Add tests which run on the Python 3.14 beta which should automatically update after release
|
|
|
|
Fixes #1537
|
|
---
|
|
markdown/__main__.py | 3 +--
|
|
markdown/core.py | 2 +-
|
|
markdown/extensions/md_in_html.py | 6 +++++-
|
|
markdown/htmlparser.py | 24 ++++++++++++++++++++++--
|
|
6 files changed, 35 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/markdown/__main__.py b/markdown/__main__.py
|
|
index c323aaa..259df63 100644
|
|
--- a/markdown/__main__.py
|
|
+++ b/markdown/__main__.py
|
|
@@ -21,7 +21,6 @@ from __future__ import annotations
|
|
|
|
import sys
|
|
import optparse
|
|
-import codecs
|
|
import warnings
|
|
import markdown
|
|
try:
|
|
@@ -100,7 +99,7 @@ def parse_options(args=None, values=None):
|
|
|
|
extension_configs = {}
|
|
if options.configfile:
|
|
- with codecs.open(
|
|
+ with open(
|
|
options.configfile, mode="r", encoding=options.encoding
|
|
) as fp:
|
|
try:
|
|
diff --git a/markdown/core.py b/markdown/core.py
|
|
index 6b556b4..e091b21 100644
|
|
--- a/markdown/core.py
|
|
+++ b/markdown/core.py
|
|
@@ -417,7 +417,7 @@ class Markdown:
|
|
# Read the source
|
|
if input:
|
|
if isinstance(input, str):
|
|
- input_file = codecs.open(input, mode="r", encoding=encoding)
|
|
+ input_file = open(input, mode="r", encoding=encoding)
|
|
else:
|
|
input_file = codecs.getreader(encoding)(input)
|
|
text = input_file.read()
|
|
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
|
|
index 99001ca..bf2a2fa 100644
|
|
--- a/markdown/extensions/md_in_html.py
|
|
+++ b/markdown/extensions/md_in_html.py
|
|
@@ -230,7 +230,11 @@ class HTMLExtractorExtra(HTMLExtractor):
|
|
if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':
|
|
# We have encountered the bug in #1534 (Python bug `gh-77057`).
|
|
# Provide an override until we drop support for Python < 3.13.
|
|
- return self.parse_bogus_comment(i)
|
|
+ result = self.parse_bogus_comment(i)
|
|
+ if result == -1:
|
|
+ self.handle_data(self.rawdata[i:i + 1])
|
|
+ return i + 1
|
|
+ return result
|
|
# The same override exists in `HTMLExtractor` without the check
|
|
# for `mdstack`. Therefore, use parent of `HTMLExtractor` instead.
|
|
return super(HTMLExtractor, self).parse_html_declaration(i)
|
|
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
|
|
index b9b9c6f..a4dd42d 100644
|
|
--- a/markdown/htmlparser.py
|
|
+++ b/markdown/htmlparser.py
|
|
@@ -85,6 +85,8 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
|
|
self.lineno_start_cache = [0]
|
|
|
|
+ self.override_comment_update = False
|
|
+
|
|
# This calls self.reset
|
|
super().__init__(*args, **kwargs)
|
|
self.md = md
|
|
@@ -245,8 +247,21 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
self.handle_empty_tag('&{};'.format(name), is_block=False)
|
|
|
|
def handle_comment(self, data: str):
|
|
+ # Check if the comment is unclosed, if so, we need to override position
|
|
+ i = self.line_offset + self.offset + len(data) + 4
|
|
+ if self.rawdata[i:i + 3] != '-->':
|
|
+ self.handle_data('<')
|
|
+ self.override_comment_update = True
|
|
+ return
|
|
self.handle_empty_tag('<!--{}-->'.format(data), is_block=True)
|
|
|
|
+ def updatepos(self, i: int, j: int) -> int:
|
|
+ if self.override_comment_update:
|
|
+ self.override_comment_update = False
|
|
+ i = 0
|
|
+ j = 1
|
|
+ return super().updatepos(i, j)
|
|
+
|
|
def handle_decl(self, data: str):
|
|
self.handle_empty_tag('<!{}>'.format(data), is_block=True)
|
|
|
|
@@ -270,7 +285,11 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':
|
|
# We have encountered the bug in #1534 (Python bug `gh-77057`).
|
|
# Provide an override until we drop support for Python < 3.13.
|
|
- return self.parse_bogus_comment(i)
|
|
+ result = self.parse_bogus_comment(i)
|
|
+ if result == -1:
|
|
+ self.handle_data(self.rawdata[i:i + 1])
|
|
+ return i + 1
|
|
+ return result
|
|
return super().parse_html_declaration(i)
|
|
# This is not the beginning of a raw block so treat as plain data
|
|
# and avoid consuming any tags which may follow (see #1066).
|
|
@@ -291,7 +310,8 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
self.__starttag_text = None
|
|
endpos = self.check_for_whole_start_tag(i)
|
|
if endpos < 0:
|
|
- return endpos
|
|
+ self.handle_data(self.rawdata[i:i + 1])
|
|
+ return i + 1
|
|
rawdata = self.rawdata
|
|
self.__starttag_text = rawdata[i:endpos]
|
|
|
|
--
|
|
2.54.0
|
|
|
|
|
|
From d4cfcb5c2b45634199b636bfac79600a1786c552 Mon Sep 17 00:00:00 2001
|
|
From: David King <dking@redhat.com>
|
|
Date: Tue, 28 Apr 2026 16:57:49 +0100
|
|
Subject: [PATCH 3/4] Backport upstream 3.10.1 to 3.10.2 fixes
|
|
|
|
---
|
|
markdown/htmlparser.py | 74 ++++++++++++---
|
|
markdown/inlinepatterns.py | 8 +-
|
|
tests/test_syntax/blocks/test_html_blocks.py | 97 ++++++++++++++++++--
|
|
3 files changed, 155 insertions(+), 24 deletions(-)
|
|
|
|
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
|
|
index a4dd42d..08f5a32 100644
|
|
--- a/markdown/htmlparser.py
|
|
+++ b/markdown/htmlparser.py
|
|
@@ -30,6 +30,9 @@ import importlib.util
|
|
import sys
|
|
|
|
|
|
+# Included for versions which do not have current comment fix
|
|
+commentclose = re.compile(r'--!?>')
|
|
+
|
|
# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
|
|
# Users can still do `from html import parser` and get the default behavior.
|
|
spec = importlib.util.find_spec('html.parser')
|
|
@@ -37,6 +40,12 @@ htmlparser = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(htmlparser)
|
|
sys.modules['htmlparser'] = htmlparser
|
|
|
|
+# This is a hack. We are sneaking in `</>` so we can capture it without the HTML parser
|
|
+# throwing it away. When we see it, we will process it as data.
|
|
+htmlparser.starttagopen = re.compile('<[a-zA-Z]|</>')
|
|
+
|
|
+htmlparser.endtagopen = re.compile('</[a-zA-Z]?')
|
|
+
|
|
# Monkeypatch `HTMLParser` to only accept `?>` to close Processing Instructions.
|
|
htmlparser.piclose = re.compile(r'\?>')
|
|
# Monkeypatch `HTMLParser` to only recognize entity references with a closing semicolon.
|
|
@@ -67,6 +76,30 @@ htmlparser.locatestarttagend_tolerant = re.compile(r"""
|
|
blank_line_re = re.compile(r'^([ ]*\n){2}')
|
|
|
|
|
|
+class _HTMLParser(htmlparser.HTMLParser):
|
|
+ """Handle special start and end tags."""
|
|
+
|
|
+ def parse_endtag(self, i):
|
|
+ start = self.rawdata[i:i+3]
|
|
+ c = ord(start[-1])
|
|
+ if len(start) < 3 or not (65 <= c <= 90 or 97 <= c <= 122):
|
|
+ self.handle_data(self.rawdata[i:i + 2])
|
|
+ return i + 2
|
|
+ return super().parse_endtag(i)
|
|
+
|
|
+ def parse_starttag(self, i): # pragma: no cover
|
|
+ # Treat `</>` as normal data as it is not a real tag.
|
|
+ if self.rawdata[i:i + 3] == '</>':
|
|
+ self.handle_data(self.rawdata[i:i + 3])
|
|
+ return i + 3
|
|
+
|
|
+ return super().parse_starttag(i)
|
|
+
|
|
+
|
|
+# Overwrite our custom one for people like MkDocs that pull it in
|
|
+htmlparser.HTMLParser = _HTMLParser
|
|
+
|
|
+
|
|
class HTMLExtractor(htmlparser.HTMLParser):
|
|
"""
|
|
Extract raw HTML from text.
|
|
@@ -85,8 +118,6 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
|
|
self.lineno_start_cache = [0]
|
|
|
|
- self.override_comment_update = False
|
|
-
|
|
# This calls self.reset
|
|
super().__init__(*args, **kwargs)
|
|
self.md = md
|
|
@@ -247,21 +278,8 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
self.handle_empty_tag('&{};'.format(name), is_block=False)
|
|
|
|
def handle_comment(self, data: str):
|
|
- # Check if the comment is unclosed, if so, we need to override position
|
|
- i = self.line_offset + self.offset + len(data) + 4
|
|
- if self.rawdata[i:i + 3] != '-->':
|
|
- self.handle_data('<')
|
|
- self.override_comment_update = True
|
|
- return
|
|
self.handle_empty_tag('<!--{}-->'.format(data), is_block=True)
|
|
|
|
- def updatepos(self, i: int, j: int) -> int:
|
|
- if self.override_comment_update:
|
|
- self.override_comment_update = False
|
|
- i = 0
|
|
- j = 1
|
|
- return super().updatepos(i, j)
|
|
-
|
|
def handle_decl(self, data: str):
|
|
self.handle_empty_tag('<!{}>'.format(data), is_block=True)
|
|
|
|
@@ -280,6 +298,18 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
self.handle_data('<?')
|
|
return i + 2
|
|
|
|
+ def parse_comment(self, i, report=True):
|
|
+ rawdata = self.rawdata
|
|
+ assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'
|
|
+ match = commentclose.search(rawdata, i+4)
|
|
+ if not match:
|
|
+ self.handle_data('<')
|
|
+ return i + 1
|
|
+ if report:
|
|
+ j = match.start()
|
|
+ self.handle_comment(rawdata[i+4: j])
|
|
+ return match.end()
|
|
+
|
|
def parse_html_declaration(self, i: int) -> int:
|
|
if self.at_line_start() or self.intail:
|
|
if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':
|
|
@@ -296,6 +326,15 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
self.handle_data('<!')
|
|
return i + 2
|
|
|
|
+ def parse_bogus_comment(self, i, report=0):
|
|
+ # Override the default behavior so that bogus comments get passed
|
|
+ # through unaltered by setting `report` to `0` (see #1425).
|
|
+ pos = super().parse_bogus_comment(i, report)
|
|
+ if pos == -1: # pragma: no cover
|
|
+ return -1
|
|
+ self.handle_empty_tag(self.rawdata[i:pos], is_block=False)
|
|
+ return pos
|
|
+
|
|
# The rest has been copied from base class in standard lib to address #1036.
|
|
# As `__startag_text` is private, all references to it must be in this subclass.
|
|
# The last few lines of `parse_starttag` are reversed so that `handle_starttag`
|
|
@@ -307,6 +346,11 @@ class HTMLExtractor(htmlparser.HTMLParser):
|
|
return self.__starttag_text
|
|
|
|
def parse_starttag(self, i: int) -> int: # pragma: no cover
|
|
+ # Treat `</>` as normal data as it is not a real tag.
|
|
+ if self.rawdata[i:i + 3] == '</>':
|
|
+ self.handle_data(self.rawdata[i:i + 3])
|
|
+ return i + 3
|
|
+
|
|
self.__starttag_text = None
|
|
endpos = self.check_for_whole_start_tag(i)
|
|
if endpos < 0:
|
|
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
|
|
index 296ab83..78310fc 100644
|
|
--- a/markdown/inlinepatterns.py
|
|
+++ b/markdown/inlinepatterns.py
|
|
@@ -161,7 +161,13 @@ AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
|
|
AUTOMAIL_RE = r'<([^<> !]+@[^@<> ]+)>'
|
|
""" Match an automatic email link (`<me@example.com>`). """
|
|
|
|
-HTML_RE = r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|!--(?:(?!<!--|-->).)*--)>)'
|
|
+HTML_RE = (
|
|
+ r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|'
|
|
+ r'!--(?:(?!<!--|-->).)*--|'
|
|
+ r'[?](?:(?!<[?]|[?]>).)*[?]|'
|
|
+ r'!\[CDATA\[(?:(?!<!\[CDATA\[|\]\]>).)*\]\]'
|
|
+ ')>)'
|
|
+)
|
|
""" Match an HTML tag (`<...>`). """
|
|
|
|
ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)'
|
|
diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py
|
|
index e8c37b3..3b95ec7 100644
|
|
--- a/tests/test_syntax/blocks/test_html_blocks.py
|
|
+++ b/tests/test_syntax/blocks/test_html_blocks.py
|
|
@@ -782,16 +782,10 @@ class TestHTMLBlocks(TestCase):
|
|
'<!-- *foo* -->'
|
|
)
|
|
|
|
- # Note: this is a change in behavior for Python-Markdown, which does *not* match the reference
|
|
- # implementation. However, it does match the HTML5 spec. Declarations must start with either
|
|
- # `<!DOCTYPE` or `<![`. Anything else that starts with `<!` is a comment. According to the
|
|
- # HTML5 spec, a comment without the hyphens is a "bogus comment", but a comment nonetheless.
|
|
- # See https://www.w3.org/TR/html52/syntax.html#markup-declaration-open-state.
|
|
- # If we wanted to change this behavior, we could override `HTMLParser.parse_bogus_comment()`.
|
|
def test_bogus_comment(self):
|
|
self.assertMarkdownRenders(
|
|
- '<!*foo*>',
|
|
- '<!--*foo*-->'
|
|
+ '<!invalid>',
|
|
+ '<p><!invalid></p>'
|
|
)
|
|
|
|
def test_raw_multiline_comment(self):
|
|
@@ -1624,3 +1618,90 @@ class TestHTMLBlocks(TestCase):
|
|
placeholder = md.htmlStash.get_placeholder(md.htmlStash.html_counter + 1)
|
|
result = md.postprocessors['raw_html'].run(placeholder)
|
|
self.assertEqual(placeholder, result)
|
|
+
|
|
+ def test_bogus_comment_endtag(self):
|
|
+ self.assertMarkdownRenders(
|
|
+ '</#invalid>',
|
|
+ '<p></#invalid></p>'
|
|
+ )
|
|
+
|
|
+ def test_issue_1590(self):
|
|
+ """Test case with comments in table for issue #1590."""
|
|
+
|
|
+ self.assertMarkdownRenders(
|
|
+ self.dedent(
|
|
+ '''
|
|
+ <table>
|
|
+ <!--[if mso]>-->
|
|
+ <td>foo</td>
|
|
+ <!--<!endif]-->
|
|
+ <td>bar</td>
|
|
+ </table>
|
|
+ '''
|
|
+ ),
|
|
+ self.dedent(
|
|
+ '''
|
|
+ <table>
|
|
+ <!--[if mso]>-->
|
|
+ <td>foo</td>
|
|
+ <!--<!endif]-->
|
|
+ <td>bar</td>
|
|
+ </table>
|
|
+ '''
|
|
+ )
|
|
+ )
|
|
+
|
|
+ def test_stress_comment_handling(self):
|
|
+ """Stress test the comment handling."""
|
|
+
|
|
+ self.assertMarkdownRenders(
|
|
+ self.dedent(
|
|
+ '''
|
|
+ `</` <!-- `<!--[if mso]>` and <!-- </> and `<!--[if mso]>`
|
|
+
|
|
+ <!-- and <!-- `<!--[if mso]>` and </> `</` and `<!--[if mso]>`
|
|
+
|
|
+ <!-- Real comment -->
|
|
+
|
|
+ `<!--[if mso]>` `</` `<!--[if mso]>` and </> <!-- and <!--
|
|
+
|
|
+ </> `<!--[if mso]>` `</` <!-- and <!-- and `<!--[if mso]>`
|
|
+ '''
|
|
+ ),
|
|
+ self.dedent(
|
|
+ '''
|
|
+ <p><code></</code> <!-- <code><!--[if mso]></code> and <!-- </> and <code><!--[if mso]></code></p>
|
|
+ <p><!-- and <!-- <code><!--[if mso]></code> and </> <code></</code> and <code><!--[if mso]></code></p>
|
|
+ <!-- Real comment -->
|
|
+ <p><code><!--[if mso]></code> <code></</code> <code><!--[if mso]></code> and </> <!-- and <!--</p>
|
|
+ <p></> <code><!--[if mso]></code> <code></</code> <!-- and <!-- and <code><!--[if mso]></code></p>
|
|
+ ''' # noqa: E501
|
|
+ )
|
|
+ )
|
|
+
|
|
+ def test_unclosed_endtag(self):
|
|
+ """Ensure unclosed end tag does not have side effects."""
|
|
+
|
|
+ self.assertMarkdownRenders(
|
|
+ self.dedent(
|
|
+ '''
|
|
+ `</`
|
|
+
|
|
+ <div>
|
|
+ <!--[if mso]>-->
|
|
+ <p>foo</p>
|
|
+ <!--<!endif]-->
|
|
+ </div>
|
|
+ '''
|
|
+ ),
|
|
+ self.dedent(
|
|
+ '''
|
|
+ <p><code></</code></p>
|
|
+ <div>
|
|
+ <!--[if mso]>-->
|
|
+ <p>foo</p>
|
|
+ <!--<!endif]-->
|
|
+ </div>
|
|
+ '''
|
|
+ )
|
|
+ )
|
|
--
|
|
2.54.0
|
|
|
|
|
|
From 719b1921b528706c87218d6324c565b4627bbe4f Mon Sep 17 00:00:00 2001
|
|
From: David King <dking@redhat.com>
|
|
Date: Mon, 11 May 2026 08:03:13 +0100
|
|
Subject: [PATCH 4/4] Extra fix for failing tests
|
|
|
|
---
|
|
markdown/htmlparser.py | 17 +++++++++++++++++
|
|
1 file changed, 17 insertions(+)
|
|
|
|
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
|
|
index 08f5a32..bb64f40 100644
|
|
--- a/markdown/htmlparser.py
|
|
+++ b/markdown/htmlparser.py
|
|
@@ -70,6 +70,23 @@ htmlparser.locatestarttagend_tolerant = re.compile(r"""
|
|
)?
|
|
\s* # trailing whitespace
|
|
""", re.VERBOSE)
|
|
+# Monkeypatch `locatetagend` if it exists (Python 3.14+) to also exclude backticks.
|
|
+# `check_for_whole_start_tag` uses `locatetagend` instead of `locatestarttagend_tolerant` on 3.14+.
|
|
+if hasattr(htmlparser, 'locatetagend'):
|
|
+ htmlparser.locatetagend = re.compile(r"""
|
|
+ [a-zA-Z][^`\t\n\r\f />]* # tag name <= added backtick
|
|
+ [\t\n\r\f /]* # optional whitespace before attribute name
|
|
+ (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^`\t\n\r\f /=>]* # attribute name <= added backtick
|
|
+ (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
|
|
+ (?:'[^']*' # LITA-enclosed value
|
|
+ |"[^"]*" # LIT-enclosed value
|
|
+ |(?!['"])[^`>\t\n\r\f ]* # bare value <= added backtick
|
|
+ )
|
|
+ )?
|
|
+ [\t\n\r\f /]* # possibly followed by a space
|
|
+ )*
|
|
+ >?
|
|
+ """, re.VERBOSE)
|
|
|
|
# Match a blank line at the start of a block of text (two newlines).
|
|
# The newlines may be preceded by additional whitespace.
|
|
--
|
|
2.54.0
|
|
|