From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 7 Nov 2022 19:22:14 -0800 Subject: [PATCH] 00394-cve-2022-45061-cpu-denial-of-service-via-inefficient-idna-decoder.patch 00394 # gh-98433: Fix quadratic time idna decoding. There was an unnecessary quadratic loop in idna decoding. This restores the behavior to linear. Backported from python3. (cherry picked from commit a6f6c3a3d6f2b580f2d87885c9b8a9350ad7bf15) Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Co-authored-by: Gregory P. Smith --- Lib/encodings/idna.py | 32 +++++++++---------- Lib/test/test_codecs.py | 6 ++++ ...2-11-04-09-29-36.gh-issue-98433.l76c5G.rst | 6 ++++ 3 files changed, 27 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index ea90d67142f..2ce798cf47e 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -39,23 +39,21 @@ def nameprep(label): # Check bidi RandAL = map(stringprep.in_table_d1, label) - for c in RandAL: - if c: - # There is a RandAL char in the string. Must perform further - # tests: - # 1) The characters in section 5.8 MUST be prohibited. - # This is table C.8, which was already checked - # 2) If a string contains any RandALCat character, the string - # MUST NOT contain any LCat character. - if filter(stringprep.in_table_d2, label): - raise UnicodeError("Violation of BIDI requirement 2") - - # 3) If a string contains any RandALCat character, a - # RandALCat character MUST be the first character of the - # string, and a RandALCat character MUST be the last - # character of the string. - if not RandAL[0] or not RandAL[-1]: - raise UnicodeError("Violation of BIDI requirement 3") + if any(RandAL): + # There is a RandAL char in the string. Must perform further + # tests: + # 1) The characters in section 5.8 MUST be prohibited. + # This is table C.8, which was already checked + # 2) If a string contains any RandALCat character, the string + # MUST NOT contain any LCat character. + if any(stringprep.in_table_d2(x) for x in label): + raise UnicodeError("Violation of BIDI requirement 2") + # 3) If a string contains any RandALCat character, a + # RandALCat character MUST be the first character of the + # string, and a RandALCat character MUST be the last + # character of the string. + if not RandAL[0] or not RandAL[-1]: + raise UnicodeError("Violation of BIDI requirement 3") return label diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 0ec8bf5a4b4..76428e1794a 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1318,6 +1318,12 @@ class IDNACodecTest(unittest.TestCase): self.assertEqual(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org") self.assertEqual(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.") + def test_builtin_decode_length_limit(self): + with self.assertRaisesRegexp(UnicodeError, "too long"): + (b"xn--016c"+b"a"*1100).decode("idna") + with self.assertRaisesRegexp(UnicodeError, "too long"): + (b"xn--016c"+b"a"*70).decode("idna") + def test_stream(self): import StringIO r = codecs.getreader("idna")(StringIO.StringIO("abc")) diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst new file mode 100644 index 00000000000..5185fac2e29 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst @@ -0,0 +1,6 @@ +The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio` +related name resolution functions no longer involves a quadratic algorithm. +This prevents a potential CPU denial of service if an out-of-spec excessive +length hostname involving bidirectional characters were decoded. Some protocols +such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker +to supply such a name. diff -urNp a/Lib/encodings/idna.py b/Lib/encodings/idna.py --- a/Lib/encodings/idna.py 2023-02-16 08:58:06.884171667 +0100 +++ b/Lib/encodings/idna.py 2023-02-16 08:59:31.931296399 +0100 @@ -101,6 +101,16 @@ def ToASCII(label): raise UnicodeError("label empty or too long") def ToUnicode(label): + if len(label) > 1024: + # Protection from https://github.com/python/cpython/issues/98433. + # https://datatracker.ietf.org/doc/html/rfc5894#section-6 + # doesn't specify a label size limit prior to NAMEPREP. But having + # one makes practical sense. + # This leaves ample room for nameprep() to remove Nothing characters + # per https://www.rfc-editor.org/rfc/rfc3454#section-3.1 while still + # preventing us from wasting time decoding a big thing that'll just + # hit the actual <= 63 length limit in Step 6. + raise UnicodeError("label way too long") # Step 1: Check for ASCII if isinstance(label, str): pure_ascii = True