2022-05-10 07:01:29 +00:00
|
|
|
From 78da9e020385fe78e36c20f99a0910bbc4a0c100 Mon Sep 17 00:00:00 2001
|
2021-11-09 09:51:07 +00:00
|
|
|
From: Lumir Balhar <lbalhar@redhat.com>
|
|
|
|
Date: Thu, 1 Apr 2021 08:18:07 +0200
|
|
|
|
Subject: [PATCH] CVE-2021-23336: Add `separator` argument to parse_qs; warn
|
|
|
|
with default
|
|
|
|
MIME-Version: 1.0
|
|
|
|
Content-Type: text/plain; charset=UTF-8
|
|
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
|
|
|
|
Partially backports https://bugs.python.org/issue42967 : [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
|
|
|
|
However, this solution is different than the upstream solution in Python 3.6.13.
|
|
|
|
|
|
|
|
An optional argument seperator is added to specify the separator.
|
|
|
|
It is recommended to set it to '&' or ';' to match the application or proxy in use.
|
|
|
|
The default can be set with an env variable of a config file.
|
|
|
|
If neither the argument, env var or config file specifies a separator, "&" is used
|
|
|
|
but a warning is raised if parse_qs is used on input that contains ';'.
|
|
|
|
|
|
|
|
Co-authors of the upstream change (who do not necessarily agree with this):
|
|
|
|
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
|
|
|
|
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
|
|
|
|
Co-authored-by: Éric Araujo <merwok@netwok.org>
|
|
|
|
---
|
2022-05-10 07:01:29 +00:00
|
|
|
Doc/library/cgi.rst | 2 +-
|
|
|
|
Doc/library/urllib.parse.rst | 12 +-
|
|
|
|
Lib/cgi.py | 4 +-
|
|
|
|
Lib/test/test_cgi.py | 29 +++++
|
|
|
|
Lib/test/test_urlparse.py | 232 ++++++++++++++++++++++++++++++++++-
|
|
|
|
Lib/urllib/parse.py | 78 +++++++++++-
|
|
|
|
6 files changed, 339 insertions(+), 18 deletions(-)
|
2021-11-09 09:51:07 +00:00
|
|
|
|
|
|
|
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
|
|
|
|
index 880074b..d8a6dc1 100644
|
|
|
|
--- a/Doc/library/cgi.rst
|
|
|
|
+++ b/Doc/library/cgi.rst
|
|
|
|
@@ -277,7 +277,7 @@ These are useful if you want more control, or if you want to employ some of the
|
|
|
|
algorithms implemented in this module in other circumstances.
|
|
|
|
|
|
|
|
|
|
|
|
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&")
|
|
|
|
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator=None)
|
|
|
|
|
|
|
|
Parse a query in the environment or from a file (the file defaults to
|
|
|
|
``sys.stdin``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are
|
|
|
|
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
|
2022-05-10 07:01:29 +00:00
|
|
|
index a6cfc5d..85b2448 100644
|
2021-11-09 09:51:07 +00:00
|
|
|
--- a/Doc/library/urllib.parse.rst
|
|
|
|
+++ b/Doc/library/urllib.parse.rst
|
|
|
|
@@ -165,7 +165,7 @@ or on combining URL components into a URL string.
|
|
|
|
now raise :exc:`ValueError`.
|
|
|
|
|
|
|
|
|
|
|
|
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
|
|
|
|
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator=None)
|
|
|
|
|
|
|
|
Parse a query string given as a string argument (data of type
|
|
|
|
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a
|
|
|
|
@@ -191,7 +191,13 @@ or on combining URL components into a URL string.
|
|
|
|
*max_num_fields* fields read.
|
|
|
|
|
|
|
|
The optional argument *separator* is the symbol to use for separating the
|
|
|
|
- query arguments. It defaults to ``&``.
|
|
|
|
+ query arguments. It is recommended to set it to ``'&'`` or ``';'``.
|
|
|
|
+ It defaults to ``'&'``; a warning is raised if this default is used.
|
|
|
|
+ This default may be changed with the following environment variable settings:
|
|
|
|
+
|
|
|
|
+ - ``PYTHON_URLLIB_QS_SEPARATOR='&'``: use only ``&`` as separator, without warning (as in Python 3.6.13+ or 3.10)
|
|
|
|
+ - ``PYTHON_URLLIB_QS_SEPARATOR=';'``: use only ``;`` as separator
|
|
|
|
+ - ``PYTHON_URLLIB_QS_SEPARATOR=legacy``: use both ``&`` and ``;`` (as in previous versions of Python)
|
|
|
|
|
|
|
|
Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
|
|
|
|
parameter set to ``True``) to convert such dictionaries into query
|
|
|
|
@@ -236,7 +242,7 @@ or on combining URL components into a URL string.
|
|
|
|
*max_num_fields* fields read.
|
|
|
|
|
|
|
|
The optional argument *separator* is the symbol to use for separating the
|
|
|
|
- query arguments. It defaults to ``&``.
|
|
|
|
+ query arguments. It works as in :py:func:`parse_qs`.
|
|
|
|
|
|
|
|
Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
|
|
|
|
query strings.
|
|
|
|
diff --git a/Lib/cgi.py b/Lib/cgi.py
|
|
|
|
index 1e880e5..d7b994b 100755
|
|
|
|
--- a/Lib/cgi.py
|
|
|
|
+++ b/Lib/cgi.py
|
|
|
|
@@ -116,7 +116,7 @@ log = initlog # The current logging function
|
|
|
|
maxlen = 0
|
|
|
|
|
|
|
|
def parse(fp=None, environ=os.environ, keep_blank_values=0,
|
|
|
|
- strict_parsing=0, separator='&'):
|
|
|
|
+ strict_parsing=0, separator=None):
|
|
|
|
"""Parse a query in the environment or from a file (default stdin)
|
|
|
|
|
|
|
|
Arguments, all optional:
|
|
|
|
@@ -319,7 +319,7 @@ class FieldStorage:
|
|
|
|
def __init__(self, fp=None, headers=None, outerboundary=b'',
|
|
|
|
environ=os.environ, keep_blank_values=0, strict_parsing=0,
|
|
|
|
limit=None, encoding='utf-8', errors='replace',
|
|
|
|
- max_num_fields=None, separator='&'):
|
|
|
|
+ max_num_fields=None, separator=None):
|
|
|
|
"""Constructor. Read multipart/* until last part.
|
|
|
|
|
|
|
|
Arguments, all optional:
|
|
|
|
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
|
|
|
|
index 4e1506a..49b6926 100644
|
|
|
|
--- a/Lib/test/test_cgi.py
|
|
|
|
+++ b/Lib/test/test_cgi.py
|
|
|
|
@@ -180,6 +180,35 @@ Content-Length: 3
|
|
|
|
|
|
|
|
env = {'QUERY_STRING': orig}
|
|
|
|
fs = cgi.FieldStorage(environ=env)
|
|
|
|
+ if isinstance(expect, dict):
|
|
|
|
+ # test dict interface
|
|
|
|
+ self.assertEqual(len(expect), len(fs))
|
|
|
|
+ self.assertCountEqual(expect.keys(), fs.keys())
|
|
|
|
+ self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
|
|
|
|
+ # test individual fields
|
|
|
|
+ for key in expect.keys():
|
|
|
|
+ expect_val = expect[key]
|
|
|
|
+ self.assertIn(key, fs)
|
|
|
|
+ if len(expect_val) > 1:
|
|
|
|
+ self.assertEqual(fs.getvalue(key), expect_val)
|
|
|
|
+ else:
|
|
|
|
+ self.assertEqual(fs.getvalue(key), expect_val[0])
|
|
|
|
+
|
|
|
|
+ def test_separator(self):
|
|
|
|
+ parse_semicolon = [
|
|
|
|
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
|
|
|
|
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
|
|
|
|
+ (";", ValueError("bad query field: ''")),
|
|
|
|
+ (";;", ValueError("bad query field: ''")),
|
|
|
|
+ ("=;a", ValueError("bad query field: 'a'")),
|
|
|
|
+ (";b=a", ValueError("bad query field: ''")),
|
|
|
|
+ ("b;=a", ValueError("bad query field: 'b'")),
|
|
|
|
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
|
|
|
|
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
|
|
|
|
+ ]
|
|
|
|
+ for orig, expect in parse_semicolon:
|
|
|
|
+ env = {'QUERY_STRING': orig}
|
|
|
|
+ fs = cgi.FieldStorage(separator=';', environ=env)
|
|
|
|
if isinstance(expect, dict):
|
|
|
|
# test dict interface
|
|
|
|
self.assertEqual(len(expect), len(fs))
|
|
|
|
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
|
2022-05-10 07:01:29 +00:00
|
|
|
index 0f99130..4e0d7e5 100644
|
2021-11-09 09:51:07 +00:00
|
|
|
--- a/Lib/test/test_urlparse.py
|
|
|
|
+++ b/Lib/test/test_urlparse.py
|
|
|
|
@@ -2,6 +2,11 @@ import sys
|
|
|
|
import unicodedata
|
|
|
|
import unittest
|
|
|
|
import urllib.parse
|
|
|
|
+from test.support import EnvironmentVarGuard
|
|
|
|
+from warnings import catch_warnings
|
|
|
|
+import tempfile
|
|
|
|
+import contextlib
|
|
|
|
+import os.path
|
|
|
|
|
|
|
|
RFC1808_BASE = "http://a/b/c/d;p?q#f"
|
|
|
|
RFC2396_BASE = "http://a/b/c/d;p?q"
|
|
|
|
@@ -32,10 +37,34 @@ parse_qsl_test_cases = [
|
|
|
|
(b"&a=b", [(b'a', b'b')]),
|
|
|
|
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
|
|
|
|
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+parse_qsl_test_cases_semicolon = [
|
|
|
|
+ (";", []),
|
|
|
|
+ (";;", []),
|
|
|
|
+ (";a=b", [('a', 'b')]),
|
|
|
|
+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
|
|
|
|
+ ("a=1;a=2", [('a', '1'), ('a', '2')]),
|
|
|
|
+ (b";", []),
|
|
|
|
+ (b";;", []),
|
|
|
|
+ (b";a=b", [(b'a', b'b')]),
|
|
|
|
+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
|
|
|
|
+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+parse_qsl_test_cases_legacy = [
|
|
|
|
+ (b"a=1;a=2&a=3", [(b'a', b'1'), (b'a', b'2'), (b'a', b'3')]),
|
|
|
|
+ (b"a=1;b=2&c=3", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
|
|
|
|
+ (b"a=1&b=2&c=3;", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+parse_qsl_test_cases_warn = [
|
|
|
|
(";a=b", [(';a', 'b')]),
|
|
|
|
("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
|
|
|
|
(b";a=b", [(b';a', b'b')]),
|
|
|
|
(b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
|
|
|
|
+ ("a=1;a=2&a=3", [('a', '1;a=2'), ('a', '3')]),
|
|
|
|
+ (b"a=1;a=2&a=3", [(b'a', b'1;a=2'), (b'a', b'3')]),
|
|
|
|
]
|
|
|
|
|
|
|
|
# Each parse_qs testcase is a two-tuple that contains
|
|
|
|
@@ -62,10 +91,37 @@ parse_qs_test_cases = [
|
|
|
|
(b"&a=b", {b'a': [b'b']}),
|
|
|
|
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
|
|
|
|
(b"a=1&a=2", {b'a': [b'1', b'2']}),
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+parse_qs_test_cases_semicolon = [
|
|
|
|
+ (";", {}),
|
|
|
|
+ (";;", {}),
|
|
|
|
+ (";a=b", {'a': ['b']}),
|
|
|
|
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
|
|
|
|
+ ("a=1;a=2", {'a': ['1', '2']}),
|
|
|
|
+ (b";", {}),
|
|
|
|
+ (b";;", {}),
|
|
|
|
+ (b";a=b", {b'a': [b'b']}),
|
|
|
|
+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
|
|
|
|
+ (b"a=1;a=2", {b'a': [b'1', b'2']}),
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+parse_qs_test_cases_legacy = [
|
|
|
|
+ ("a=1;a=2&a=3", {'a': ['1', '2', '3']}),
|
|
|
|
+ ("a=1;b=2&c=3", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
|
|
|
|
+ ("a=1&b=2&c=3;", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
|
|
|
|
+ (b"a=1;a=2&a=3", {b'a': [b'1', b'2', b'3']}),
|
|
|
|
+ (b"a=1;b=2&c=3", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
|
|
|
|
+ (b"a=1&b=2&c=3;", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+parse_qs_test_cases_warn = [
|
|
|
|
(";a=b", {';a': ['b']}),
|
|
|
|
("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
|
|
|
|
(b";a=b", {b';a': [b'b']}),
|
|
|
|
(b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
|
|
|
|
+ ("a=1;a=2&a=3", {'a': ['1;a=2', '3']}),
|
|
|
|
+ (b"a=1;a=2&a=3", {b'a': [b'1;a=2', b'3']}),
|
|
|
|
]
|
|
|
|
|
|
|
|
class UrlParseTestCase(unittest.TestCase):
|
|
|
|
@@ -123,23 +179,57 @@ class UrlParseTestCase(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_qsl(self):
|
|
|
|
for orig, expect in parse_qsl_test_cases:
|
|
|
|
- result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
|
|
|
|
+ result = urllib.parse.parse_qsl(orig, keep_blank_values=True, separator="&")
|
|
|
|
self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
expect_without_blanks = [v for v in expect if len(v[1])]
|
|
|
|
- result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
|
|
|
|
+ result = urllib.parse.parse_qsl(orig, keep_blank_values=False, separator="&")
|
|
|
|
self.assertEqual(result, expect_without_blanks,
|
|
|
|
"Error parsing %r" % orig)
|
|
|
|
|
|
|
|
def test_qs(self):
|
|
|
|
for orig, expect in parse_qs_test_cases:
|
|
|
|
- result = urllib.parse.parse_qs(orig, keep_blank_values=True)
|
|
|
|
+ result = urllib.parse.parse_qs(orig, keep_blank_values=True, separator="&")
|
|
|
|
self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
expect_without_blanks = {v: expect[v]
|
|
|
|
for v in expect if len(expect[v][0])}
|
|
|
|
- result = urllib.parse.parse_qs(orig, keep_blank_values=False)
|
|
|
|
+ result = urllib.parse.parse_qs(orig, keep_blank_values=False, separator="&")
|
|
|
|
self.assertEqual(result, expect_without_blanks,
|
|
|
|
"Error parsing %r" % orig)
|
|
|
|
|
|
|
|
+ def test_qs_default_warn(self):
|
|
|
|
+ for orig, expect in parse_qs_test_cases_warn:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect):
|
|
|
|
+ with catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qs(orig, keep_blank_values=True)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 1)
|
|
|
|
+ self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
|
|
|
|
+
|
|
|
|
+ def test_qsl_default_warn(self):
|
|
|
|
+ for orig, expect in parse_qsl_test_cases_warn:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect):
|
|
|
|
+ with catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 1)
|
|
|
|
+ self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
|
|
|
|
+
|
|
|
|
+ def test_default_qs_no_warnings(self):
|
|
|
|
+ for orig, expect in parse_qs_test_cases:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect):
|
|
|
|
+ with catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qs(orig, keep_blank_values=True)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+
|
|
|
|
+ def test_default_qsl_no_warnings(self):
|
|
|
|
+ for orig, expect in parse_qsl_test_cases:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect):
|
|
|
|
+ with catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+
|
|
|
|
def test_roundtrips(self):
|
|
|
|
str_cases = [
|
|
|
|
('file:///tmp/junk.txt',
|
2022-05-10 07:01:29 +00:00
|
|
|
@@ -919,8 +1009,8 @@ class UrlParseTestCase(unittest.TestCase):
|
2021-11-09 09:51:07 +00:00
|
|
|
|
|
|
|
def test_parse_qsl_max_num_fields(self):
|
|
|
|
with self.assertRaises(ValueError):
|
|
|
|
- urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
|
|
|
|
- urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
|
|
|
|
+ urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10, separator='&')
|
|
|
|
+ urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10, separator='&')
|
|
|
|
|
|
|
|
def test_parse_qs_separator(self):
|
|
|
|
parse_qs_semicolon_cases = [
|
2022-05-10 07:01:29 +00:00
|
|
|
@@ -964,6 +1054,136 @@ class UrlParseTestCase(unittest.TestCase):
|
|
|
|
self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
|
2021-11-09 09:51:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
+ @contextlib.contextmanager
|
|
|
|
+ def _qsl_sep_config(self, sep):
|
|
|
|
+ """Context for the given parse_qsl default separator configured in config file"""
|
|
|
|
+ old_filename = urllib.parse._QS_SEPARATOR_CONFIG_FILENAME
|
|
|
|
+ urllib.parse._default_qs_separator = None
|
|
|
|
+ try:
|
|
|
|
+ with tempfile.TemporaryDirectory() as tmpdirname:
|
|
|
|
+ filename = os.path.join(tmpdirname, 'conf.cfg')
|
|
|
|
+ with open(filename, 'w') as file:
|
|
|
|
+ file.write(f'[parse_qs]\n')
|
|
|
|
+ file.write(f'PYTHON_URLLIB_QS_SEPARATOR = {sep}')
|
|
|
|
+ urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = filename
|
|
|
|
+ yield
|
|
|
|
+ finally:
|
|
|
|
+ urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = old_filename
|
|
|
|
+ urllib.parse._default_qs_separator = None
|
|
|
|
+
|
|
|
|
+ def test_parse_qs_separator_semicolon(self):
|
|
|
|
+ for orig, expect in parse_qs_test_cases_semicolon:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='arg'):
|
|
|
|
+ result = urllib.parse.parse_qs(orig, separator=';')
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='env'):
|
|
|
|
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
|
|
|
|
+ result = urllib.parse.parse_qs(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='conf'):
|
|
|
|
+ with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qs(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+
|
|
|
|
+ def test_parse_qsl_separator_semicolon(self):
|
|
|
|
+ for orig, expect in parse_qsl_test_cases_semicolon:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='arg'):
|
|
|
|
+ result = urllib.parse.parse_qsl(orig, separator=';')
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='env'):
|
|
|
|
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
|
|
|
|
+ result = urllib.parse.parse_qsl(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='conf'):
|
|
|
|
+ with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qsl(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+
|
|
|
|
+ def test_parse_qs_separator_legacy(self):
|
|
|
|
+ for orig, expect in parse_qs_test_cases_legacy:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='env'):
|
|
|
|
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
|
|
|
|
+ result = urllib.parse.parse_qs(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='conf'):
|
|
|
|
+ with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qs(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+
|
|
|
|
+ def test_parse_qsl_separator_legacy(self):
|
|
|
|
+ for orig, expect in parse_qsl_test_cases_legacy:
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='env'):
|
|
|
|
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
|
|
|
|
+ result = urllib.parse.parse_qsl(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+ with self.subTest(orig=orig, expect=expect, method='conf'):
|
|
|
|
+ with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
|
|
|
|
+ result = urllib.parse.parse_qsl(orig)
|
|
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
|
|
+ self.assertEqual(len(w), 0)
|
|
|
|
+
|
|
|
|
+ def test_parse_qs_separator_bad_value_env_or_config(self):
|
|
|
|
+ for bad_sep in '', 'abc', 'safe', '&;', 'SEP':
|
|
|
|
+ with self.subTest(bad_sep, method='env'):
|
|
|
|
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = bad_sep
|
|
|
|
+ with self.assertRaises(ValueError):
|
|
|
|
+ urllib.parse.parse_qsl('a=1;b=2')
|
|
|
|
+ with self.subTest(bad_sep, method='conf'):
|
|
|
|
+ with self._qsl_sep_config('bad_sep'), catch_warnings(record=True) as w:
|
|
|
|
+ with self.assertRaises(ValueError):
|
|
|
|
+ urllib.parse.parse_qsl('a=1;b=2')
|
|
|
|
+
|
|
|
|
+ def test_parse_qs_separator_bad_value_arg(self):
|
|
|
|
+ for bad_sep in True, {}, '':
|
|
|
|
+ with self.subTest(bad_sep):
|
|
|
|
+ with self.assertRaises(ValueError):
|
|
|
|
+ urllib.parse.parse_qsl('a=1;b=2', separator=bad_sep)
|
|
|
|
+
|
|
|
|
+ def test_parse_qs_separator_num_fields(self):
|
|
|
|
+ for qs, sep in (
|
|
|
|
+ ('a&b&c', '&'),
|
|
|
|
+ ('a;b;c', ';'),
|
|
|
|
+ ('a&b;c', 'legacy'),
|
|
|
|
+ ):
|
|
|
|
+ with self.subTest(qs=qs, sep=sep):
|
|
|
|
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
|
|
|
|
+ if sep != 'legacy':
|
|
|
|
+ with self.assertRaises(ValueError):
|
|
|
|
+ urllib.parse.parse_qsl(qs, separator=sep, max_num_fields=2)
|
|
|
|
+ if sep:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = sep
|
|
|
|
+ with self.assertRaises(ValueError):
|
|
|
|
+ urllib.parse.parse_qsl(qs, max_num_fields=2)
|
|
|
|
+
|
|
|
|
+ def test_parse_qs_separator_priority(self):
|
|
|
|
+ # env variable trumps config file
|
|
|
|
+ with self._qsl_sep_config('~'), EnvironmentVarGuard() as environ:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = '!'
|
|
|
|
+ result = urllib.parse.parse_qs('a=1!b=2~c=3')
|
|
|
|
+ self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
|
|
|
|
+ # argument trumps config file
|
|
|
|
+ with self._qsl_sep_config('~'):
|
|
|
|
+ result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
|
|
|
|
+ self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
|
|
|
|
+ # argument trumps env variable
|
|
|
|
+ with EnvironmentVarGuard() as environ:
|
|
|
|
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = '~'
|
|
|
|
+ result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
|
|
|
|
+ self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
|
|
|
|
+
|
|
|
|
+
|
|
|
|
def test_urlencode_sequences(self):
|
|
|
|
# Other tests incidentally urlencode things; test non-covered cases:
|
|
|
|
# Sequence and object values.
|
|
|
|
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
|
2022-05-10 07:01:29 +00:00
|
|
|
index f0d9d4d..70fc268 100644
|
2021-11-09 09:51:07 +00:00
|
|
|
--- a/Lib/urllib/parse.py
|
|
|
|
+++ b/Lib/urllib/parse.py
|
|
|
|
@@ -28,6 +28,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
+import os
|
|
|
|
import sys
|
|
|
|
import collections
|
|
|
|
import warnings
|
2022-05-10 07:01:29 +00:00
|
|
|
@@ -660,7 +661,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
|
2021-11-09 09:51:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
|
|
|
- encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
|
|
|
|
+ encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
|
|
|
|
"""Parse a query given as a string argument.
|
|
|
|
|
|
|
|
Arguments:
|
2022-05-10 07:01:29 +00:00
|
|
|
@@ -700,9 +701,16 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
2021-11-09 09:51:07 +00:00
|
|
|
parsed_result[name] = [value]
|
|
|
|
return parsed_result
|
|
|
|
|
|
|
|
+class _QueryStringSeparatorWarning(RuntimeWarning):
|
|
|
|
+ """Warning for using default `separator` in parse_qs or parse_qsl"""
|
|
|
|
+
|
|
|
|
+# The default "separator" for parse_qsl can be specified in a config file.
|
|
|
|
+# It's cached after first read.
|
|
|
|
+_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
|
|
|
|
+_default_qs_separator = None
|
|
|
|
|
|
|
|
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|
|
|
- encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
|
|
|
|
+ encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
|
|
|
|
"""Parse a query given as a string argument.
|
|
|
|
|
|
|
|
Arguments:
|
2022-05-10 07:01:29 +00:00
|
|
|
@@ -731,20 +739,78 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|
|
|
Returns a list, as G-d intended.
|
2021-11-09 09:51:07 +00:00
|
|
|
"""
|
|
|
|
qs, _coerce_result = _coerce_args(qs)
|
2022-05-10 07:01:29 +00:00
|
|
|
- separator, _ = _coerce_args(separator)
|
2021-11-09 09:51:07 +00:00
|
|
|
|
|
|
|
- if not separator or (not isinstance(separator, (str, bytes))):
|
|
|
|
+ if isinstance(separator, bytes):
|
|
|
|
+ separator = separator.decode('ascii')
|
|
|
|
+
|
|
|
|
+ if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
|
|
|
|
raise ValueError("Separator must be of type string or bytes.")
|
|
|
|
|
|
|
|
+ # Used when both "&" and ";" act as separators. (Need a non-string value.)
|
|
|
|
+ _legacy = object()
|
|
|
|
+
|
|
|
|
+ if separator is None:
|
|
|
|
+ global _default_qs_separator
|
|
|
|
+ separator = _default_qs_separator
|
|
|
|
+ envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
|
|
|
|
+ if separator is None:
|
|
|
|
+ # Set default separator from environment variable
|
|
|
|
+ separator = os.environ.get(envvar_name)
|
|
|
|
+ config_source = 'environment variable'
|
|
|
|
+ if separator is None:
|
|
|
|
+ # Set default separator from the configuration file
|
|
|
|
+ try:
|
|
|
|
+ file = open(_QS_SEPARATOR_CONFIG_FILENAME)
|
|
|
|
+ except FileNotFoundError:
|
|
|
|
+ pass
|
|
|
|
+ else:
|
|
|
|
+ with file:
|
|
|
|
+ import configparser
|
|
|
|
+ config = configparser.ConfigParser(
|
|
|
|
+ interpolation=None,
|
|
|
|
+ comment_prefixes=('#', ),
|
|
|
|
+ )
|
|
|
|
+ config.read_file(file)
|
|
|
|
+ separator = config.get('parse_qs', envvar_name, fallback=None)
|
|
|
|
+ _default_qs_separator = separator
|
|
|
|
+ config_source = _QS_SEPARATOR_CONFIG_FILENAME
|
|
|
|
+ if separator is None:
|
|
|
|
+ # The default is '&', but warn if not specified explicitly
|
|
|
|
+ if ';' in qs:
|
|
|
|
+ from warnings import warn
|
|
|
|
+ warn("The default separator of urllib.parse.parse_qsl and "
|
|
|
|
+ + "parse_qs was changed to '&' to avoid a web cache "
|
|
|
|
+ + "poisoning issue (CVE-2021-23336). "
|
|
|
|
+ + "By default, semicolons no longer act as query field "
|
|
|
|
+ + "separators. "
|
|
|
|
+ + "See https://access.redhat.com/articles/5860431 for "
|
|
|
|
+ + "more details.",
|
|
|
|
+ _QueryStringSeparatorWarning, stacklevel=2)
|
|
|
|
+ separator = '&'
|
|
|
|
+ elif separator == 'legacy':
|
|
|
|
+ separator = _legacy
|
|
|
|
+ elif len(separator) != 1:
|
|
|
|
+ raise ValueError(
|
|
|
|
+ f'{envvar_name} (from {config_source}) must contain '
|
|
|
|
+ + '1 character, or "legacy". See '
|
|
|
|
+ + 'https://access.redhat.com/articles/5860431 for more details.'
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
# If max_num_fields is defined then check that the number of fields
|
|
|
|
# is less than max_num_fields. This prevents a memory exhaustion DOS
|
|
|
|
# attack via post bodies with many fields.
|
|
|
|
if max_num_fields is not None:
|
|
|
|
- num_fields = 1 + qs.count(separator)
|
|
|
|
+ if separator is _legacy:
|
|
|
|
+ num_fields = 1 + qs.count('&') + qs.count(';')
|
|
|
|
+ else:
|
|
|
|
+ num_fields = 1 + qs.count(separator)
|
|
|
|
if max_num_fields < num_fields:
|
|
|
|
raise ValueError('Max number of fields exceeded')
|
|
|
|
|
|
|
|
- pairs = [s1 for s1 in qs.split(separator)]
|
|
|
|
+ if separator is _legacy:
|
|
|
|
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
|
|
|
+ else:
|
|
|
|
+ pairs = [s1 for s1 in qs.split(separator)]
|
|
|
|
r = []
|
|
|
|
for name_value in pairs:
|
|
|
|
if not name_value and not strict_parsing:
|
|
|
|
--
|
2022-05-10 07:01:29 +00:00
|
|
|
2.31.1
|
2021-11-09 09:51:07 +00:00
|
|
|
|