diff --git a/SOURCES/00377-CVE-2022-0391.patch b/SOURCES/00377-CVE-2022-0391.patch new file mode 100644 index 0000000..aebe53d --- /dev/null +++ b/SOURCES/00377-CVE-2022-0391.patch @@ -0,0 +1,170 @@ +From 6c472d3a1d334d4eeb4a25eba7bf3b01611bf667 Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Thu, 6 May 2021 09:56:01 -0700 +Subject: [PATCH] [3.6] bpo-43882 - urllib.parse should sanitize urls + containing ASCII newline and tabs (GH-25924) + +Co-authored-by: Gregory P. Smith +Co-authored-by: Serhiy Storchaka +(cherry picked from commit 76cd81d60310d65d01f9d7b48a8985d8ab89c8b4) +Co-authored-by: Senthil Kumaran +(cherry picked from commit 515a7bc4e13645d0945b46a8e1d9102b918cd407) + +Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> +--- + Doc/library/urllib.parse.rst | 13 +++++ + Lib/test/test_urlparse.py | 48 +++++++++++++++++++ + Lib/urllib/parse.py | 10 ++++ + .../2021-04-25-07-46-37.bpo-43882.Jpwx85.rst | 6 +++ + 4 files changed, 77 insertions(+) + create mode 100644 Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst + +diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst +index 3c2e37ef2093a..b717d7cc05b2e 100644 +--- a/Doc/library/urllib.parse.rst ++++ b/Doc/library/urllib.parse.rst +@@ -288,6 +288,9 @@ or on combining URL components into a URL string. + ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is + decomposed before parsing, no error will be raised. + ++ Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline ++ ``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL. ++ + .. versionchanged:: 3.6 + Out-of-range port numbers now raise :exc:`ValueError`, instead of + returning :const:`None`. +@@ -296,6 +299,10 @@ or on combining URL components into a URL string. + Characters that affect netloc parsing under NFKC normalization will + now raise :exc:`ValueError`. + ++ .. versionchanged:: 3.6.14 ++ ASCII newline and tab characters are stripped from the URL. ++ ++.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser + + .. function:: urlunsplit(parts) + +@@ -633,6 +640,10 @@ task isn't already covered by the URL parsing functions above. + + .. seealso:: + ++ `WHATWG`_ - URL Living standard ++ Working Group for the URL Standard that defines URLs, domains, IP addresses, the ++ application/x-www-form-urlencoded format, and their API. ++ + :rfc:`3986` - Uniform Resource Identifiers + This is the current standard (STD66). Any changes to urllib.parse module + should conform to this. Certain deviations could be observed, which are +@@ -656,3 +667,5 @@ task isn't already covered by the URL parsing functions above. + + :rfc:`1738` - Uniform Resource Locators (URL) + This specifies the formal syntax and semantics of absolute URLs. ++ ++.. _WHATWG: https://url.spec.whatwg.org/ +diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py +index e3088b2f39bd7..3509278a01694 100644 +--- a/Lib/test/test_urlparse.py ++++ b/Lib/test/test_urlparse.py +@@ -612,6 +612,54 @@ def test_urlsplit_attributes(self): + with self.assertRaisesRegex(ValueError, "out of range"): + p.port + ++ def test_urlsplit_remove_unsafe_bytes(self): ++ # Remove ASCII tabs and newlines from input, for http common case scenario. ++ url = "h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, "http") ++ self.assertEqual(p.netloc, "www.python.org") ++ self.assertEqual(p.path, "/javascript:alert('msg')/") ++ self.assertEqual(p.query, "query=something") ++ self.assertEqual(p.fragment, "fragment") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, "www.python.org") ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ ++ # Remove ASCII tabs and newlines from input as bytes, for http common case scenario. ++ url = b"h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, b"http") ++ self.assertEqual(p.netloc, b"www.python.org") ++ self.assertEqual(p.path, b"/javascript:alert('msg')/") ++ self.assertEqual(p.query, b"query=something") ++ self.assertEqual(p.fragment, b"fragment") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, b"www.python.org") ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ ++ # any scheme ++ url = "x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.geturl(), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ ++ # Remove ASCII tabs and newlines from input as bytes, any scheme. ++ url = b"x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.geturl(), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ ++ # Unsafe bytes is not returned from urlparse cache. ++ # scheme is stored after parsing, sending an scheme with unsafe bytes *will not* return an unsafe scheme ++ url = "https://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ scheme = "htt\nps" ++ for _ in range(2): ++ p = urllib.parse.urlsplit(url, scheme=scheme) ++ self.assertEqual(p.scheme, "https") ++ self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ + def test_attributes_bad_port(self): + """Check handling of invalid ports.""" + for bytes in (False, True): +diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py +index 66056bf589bf6..ac6e7a9cee0b9 100644 +--- a/Lib/urllib/parse.py ++++ b/Lib/urllib/parse.py +@@ -76,6 +76,9 @@ + '0123456789' + '+-.') + ++# Unsafe bytes to be removed per WHATWG spec ++_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] ++ + # XXX: Consider replacing with functools.lru_cache + MAX_CACHE_SIZE = 20 + _parse_cache = {} +@@ -409,6 +412,11 @@ def _checknetloc(netloc): + raise ValueError("netloc '" + netloc + "' contains invalid " + + "characters under NFKC normalization") + ++def _remove_unsafe_bytes_from_url(url): ++ for b in _UNSAFE_URL_BYTES_TO_REMOVE: ++ url = url.replace(b, "") ++ return url ++ + def urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + :///?# +@@ -416,6 +424,8 @@ def urlsplit(url, scheme='', allow_fragments=True): + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) ++ url = _remove_unsafe_bytes_from_url(url) ++ scheme = _remove_unsafe_bytes_from_url(scheme) + allow_fragments = bool(allow_fragments) + key = url, scheme, allow_fragments, type(url), type(scheme) + cached = _parse_cache.get(key, None) +diff --git a/Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst b/Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst +new file mode 100644 +index 0000000000000..a326d079dff4a +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst +@@ -0,0 +1,6 @@ ++The presence of newline or tab characters in parts of a URL could allow ++some forms of attacks. ++ ++Following the controlling specification for URLs defined by WHATWG ++:func:`urllib.parse` now removes ASCII newlines and tabs from URLs, ++preventing such attacks. diff --git a/SOURCES/00378-support-expat-2-4-5.patch b/SOURCES/00378-support-expat-2-4-5.patch new file mode 100644 index 0000000..4b1e441 --- /dev/null +++ b/SOURCES/00378-support-expat-2-4-5.patch @@ -0,0 +1,98 @@ +From a5b78c6f1c802f6023bd4d7a248dc83be1eef6a3 Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping +Date: Mon, 21 Feb 2022 15:48:32 +0100 +Subject: [PATCH] 00378: Support expat 2.4.5 + +Curly brackets were never allowed in namespace URIs +according to RFC 3986, and so-called namespace-validating +XML parsers have the right to reject them a invalid URIs. + +libexpat >=2.4.5 has become strcter in that regard due to +related security issues; with ET.XML instantiating a +namespace-aware parser under the hood, this test has no +future in CPython. + +References: +- https://datatracker.ietf.org/doc/html/rfc3968 +- https://www.w3.org/TR/xml-names/ + +Also, test_minidom.py: Support Expat >=2.4.5 + +Upstream: https://bugs.python.org/issue46811 + +Co-authored-by: Sebastian Pipping +--- + Lib/test/test_minidom.py | 12 +++++++++--- + Lib/test/test_xml_etree.py | 6 ------ + .../Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst | 1 + + 3 files changed, 10 insertions(+), 9 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst + +diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py +index d55e25e..e947382 100644 +--- a/Lib/test/test_minidom.py ++++ b/Lib/test/test_minidom.py +@@ -5,10 +5,12 @@ import pickle + from test import support + import unittest + ++import pyexpat + import xml.dom.minidom + + from xml.dom.minidom import parse, Node, Document, parseString + from xml.dom.minidom import getDOMImplementation ++from xml.parsers.expat import ExpatError + + + tstfile = support.findfile("test.xml", subdir="xmltestdata") +@@ -1156,8 +1158,10 @@ class MinidomTest(unittest.TestCase): + + # Verify that character decoding errors raise exceptions instead + # of crashing +- self.assertRaises(UnicodeDecodeError, parseString, +- b'Comment \xe7a va ? Tr\xe8s bien ?') ++ self.assertRaises(ExpatError, parseString, ++ b'') ++ self.assertRaises(ExpatError, parseString, ++ b'Comment \xe7a va ? Tr\xe8s bien ?') + + doc.unlink() + +@@ -1602,7 +1606,9 @@ class MinidomTest(unittest.TestCase): + self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE) + + def testExceptionOnSpacesInXMLNSValue(self): +- with self.assertRaisesRegex(ValueError, 'Unsupported syntax'): ++ context = self.assertRaisesRegex(ExpatError, 'syntax error') ++ ++ with context: + parseString('') + + def testDocRemoveChild(self): +diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py +index b01709e..acaa519 100644 +--- a/Lib/test/test_xml_etree.py ++++ b/Lib/test/test_xml_etree.py +@@ -1668,12 +1668,6 @@ class BugsTest(unittest.TestCase): + b"\n" + b'tãg') + +- def test_issue3151(self): +- e = ET.XML('') +- self.assertEqual(e.tag, '{${stuff}}localname') +- t = ET.ElementTree(e) +- self.assertEqual(ET.tostring(e), b'') +- + def test_issue6565(self): + elem = ET.XML("") + self.assertEqual(summarize_list(elem), ['tag']) +diff --git a/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst b/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst +new file mode 100644 +index 0000000..6969bd1 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst +@@ -0,0 +1 @@ ++Make test suite support Expat >=2.4.5 +-- +2.35.1 + diff --git a/SPECS/python3.spec b/SPECS/python3.spec index 785689a..74e07a4 100644 --- a/SPECS/python3.spec +++ b/SPECS/python3.spec @@ -14,7 +14,7 @@ URL: https://www.python.org/ # WARNING When rebasing to a new Python version, # remember to update the python3-docs package as well Version: %{pybasever}.8 -Release: 45%{?dist} +Release: 46%{?dist} License: Python @@ -651,6 +651,40 @@ Patch370: 00370-GIL-monotonic-clock.patch # Tracking bug: https://bugzilla.redhat.com/show_bug.cgi?id=2036020 Patch372: 00372-CVE-2021-4189.patch +# 00377 # +# CVE-2022-0391: urlparse does not sanitize URLs containing ASCII newline and tabs +# +# ASCII newline and tab characters are stripped from the URL. +# +# Upstream: https://bugs.python.org/issue43882 +# Tracking bug: https://bugzilla.redhat.com/show_bug.cgi?id=2047376 +Patch377: 00377-CVE-2022-0391.patch + +# 00378 # +# Support expat 2.4.5 +# +# Curly brackets were never allowed in namespace URIs +# according to RFC 3986, and so-called namespace-validating +# XML parsers have the right to reject them a invalid URIs. +# +# libexpat >=2.4.5 has become strcter in that regard due to +# related security issues; with ET.XML instantiating a +# namespace-aware parser under the hood, this test has no +# future in CPython. +# +# References: +# - https://datatracker.ietf.org/doc/html/rfc3968 +# - https://www.w3.org/TR/xml-names/ +# +# Also, test_minidom.py: Support Expat >=2.4.5 +# +# The patch has diverged from upstream as the python test +# suite was relying on checking the expat version, whereas +# in RHEL fixes get backported instead of rebasing packages. +# +# Upstream: https://bugs.python.org/issue46811 +Patch378: 00378-support-expat-2-4-5.patch + # (New patches go here ^^^) # # When adding new patches to "python" and "python3" in Fedora, EL, etc., @@ -990,6 +1024,8 @@ git apply %{PATCH351} %patch369 -p1 %patch370 -p1 %patch372 -p1 +%patch377 -p1 +%patch378 -p1 # Remove files that should be generated by the build # (This is after patching, so that we can use patches directly from upstream) @@ -1915,6 +1951,11 @@ fi # ====================================================== %changelog +* Wed Mar 09 2022 Charalampos Stratakis - 3.6.8-46 +- Security fix for CVE-2022-0391: urlparse does not sanitize URLs containing ASCII newline and tabs +- Fix the test suite support for Expat >= 2.4.5 +Resolves: rhbz#2047376, rhbz#2060435 + * Fri Jan 07 2022 Charalampos Stratakis - 3.6.8-45 - Security fix for CVE-2021-4189: ftplib should not use the host from the PASV response Resolves: rhbz#2036020