From d5e3238b87fc557600618f18179e821a4a1c7577 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 29 Jun 2021 16:03:37 +0200 Subject: [PATCH] CVE-2021-33503 --- src/urllib3/util/url.py | 8 +++++--- test/test_util.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index 8ef5a23..7fb2650 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -63,12 +63,12 @@ IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$") BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") -SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % ( +_HOST_PORT_PAT = ("^(%s|%s|%s)(?::([0-9]{0,5}))?$") % ( REG_NAME_PAT, IPV4_PAT, IPV6_ADDRZ_PAT, ) -SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL) +_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL) UNRESERVED_CHARS = set( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~" @@ -365,7 +365,9 @@ def parse_url(url): scheme = scheme.lower() if authority: - auth, host, port = SUBAUTHORITY_RE.match(authority).groups() + auth, _, host_port = authority.rpartition("@") + auth = auth or None + host, port = _HOST_PORT_RE.match(host_port).groups() if auth and normalize_uri: auth = _encode_invalid_chars(auth, USERINFO_CHARS) if port == "": diff --git a/test/test_util.py b/test/test_util.py index 42c3882..04c90b0 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -425,6 +425,16 @@ class TestUtil(object): query="%0D%0ASET%20test%20failure12%0D%0A:8080/test/?test=a", ), ), + # Tons of '@' causing backtracking + ("https://" + ("@" * 10000) + "[", False), + ( + "https://user:" + ("@" * 10000) + "example.com", + Url( + scheme="https", + auth="user:" + ("%40" * 9999), + host="example.com", + ), + ), ] @pytest.mark.parametrize("url, expected_url", url_vulnerabilities) -- 2.31.1