385 lines
15 KiB
Diff
385 lines
15 KiB
Diff
From 6c97acbb39693b94606b499f0c472fba2f5fd274 Mon Sep 17 00:00:00 2001
|
||
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= <thrnciar@redhat.com>
|
||
Date: Tue, 20 Aug 2024 10:44:06 +0200
|
||
Subject: [PATCH] 00435: gh-121650: Encode newlines in headers, and verify
|
||
headers are sound (GH-122233)
|
||
|
||
Per RFC 2047:
|
||
|
||
> [...] these encoding schemes allow the
|
||
> encoding of arbitrary octet values, mail readers that implement this
|
||
> decoding should also ensure that display of the decoded data on the
|
||
> recipient's terminal will not cause unwanted side-effects
|
||
|
||
It seems that the "quoted-word" scheme is a valid way to include
|
||
a newline character in a header value, just like we already allow
|
||
undecodable bytes or control characters.
|
||
They do need to be properly quoted when serialized to text, though.
|
||
|
||
This should fail for custom fold() implementations that aren't careful
|
||
about newlines.
|
||
|
||
(cherry picked from commit 097633981879b3c9de9a1dd120d3aa585ecc2384)
|
||
|
||
This patch also contains modified commit cherry picked from
|
||
c5bba853d5e7836f6d4340e18721d3fb3a6ee0f7.
|
||
|
||
This commit was backported to simplify the backport of the other commit
|
||
fixing CVE. The only modification is a removal of one test case which
|
||
tests multiple changes in Python 3.7 and it wasn't working properly
|
||
with Python 3.6 where we backported only one change.
|
||
|
||
Co-authored-by: Petr Viktorin <encukou@gmail.com>
|
||
Co-authored-by: Bas Bloemsaat <bas@bloemsaat.org>
|
||
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
|
||
Co-authored-by: bsiem <52461103+bsiem@users.noreply.github.com>
|
||
---
|
||
Doc/library/email.errors.rst | 6 ++
|
||
Doc/library/email.policy.rst | 18 ++++++
|
||
Lib/email/_header_value_parser.py | 9 +++
|
||
Lib/email/_policybase.py | 8 +++
|
||
Lib/email/errors.py | 4 ++
|
||
Lib/email/generator.py | 16 ++++-
|
||
Lib/test/test_email/test_generator.py | 62 +++++++++++++++++++
|
||
Lib/test/test_email/test_headerregistry.py | 16 +++++
|
||
Lib/test/test_email/test_policy.py | 26 ++++++++
|
||
.../2019-07-09-11-20-21.bpo-37482.auzvev.rst | 1 +
|
||
...-07-27-16-10-41.gh-issue-121650.nf6oc9.rst | 5 ++
|
||
11 files changed, 170 insertions(+), 1 deletion(-)
|
||
create mode 100644 Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst
|
||
create mode 100644 Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
|
||
|
||
diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
|
||
index 511ad16..7e51f74 100644
|
||
--- a/Doc/library/email.errors.rst
|
||
+++ b/Doc/library/email.errors.rst
|
||
@@ -59,6 +59,12 @@ The following exception classes are defined in the :mod:`email.errors` module:
|
||
:class:`~email.mime.image.MIMEImage`).
|
||
|
||
|
||
+.. exception:: HeaderWriteError()
|
||
+
|
||
+ Raised when an error occurs when the :mod:`~email.generator` outputs
|
||
+ headers.
|
||
+
|
||
+
|
||
Here is the list of the defects that the :class:`~email.parser.FeedParser`
|
||
can find while parsing messages. Note that the defects are added to the message
|
||
where the problem was found, so for example, if a message nested inside a
|
||
diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst
|
||
index 8e70762..8617b2e 100644
|
||
--- a/Doc/library/email.policy.rst
|
||
+++ b/Doc/library/email.policy.rst
|
||
@@ -229,6 +229,24 @@ added matters. To illustrate::
|
||
|
||
.. versionadded:: 3.6
|
||
|
||
+
|
||
+ .. attribute:: verify_generated_headers
|
||
+
|
||
+ If ``True`` (the default), the generator will raise
|
||
+ :exc:`~email.errors.HeaderWriteError` instead of writing a header
|
||
+ that is improperly folded or delimited, such that it would
|
||
+ be parsed as multiple headers or joined with adjacent data.
|
||
+ Such headers can be generated by custom header classes or bugs
|
||
+ in the ``email`` module.
|
||
+
|
||
+ As it's a security feature, this defaults to ``True`` even in the
|
||
+ :class:`~email.policy.Compat32` policy.
|
||
+ For backwards compatible, but unsafe, behavior, it must be set to
|
||
+ ``False`` explicitly.
|
||
+
|
||
+ .. versionadded:: 3.8.20
|
||
+
|
||
+
|
||
The following :class:`Policy` method is intended to be called by code using
|
||
the email library to create policy instances with custom settings:
|
||
|
||
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
|
||
index 9815e4e..dab4cbb 100644
|
||
--- a/Lib/email/_header_value_parser.py
|
||
+++ b/Lib/email/_header_value_parser.py
|
||
@@ -92,6 +92,8 @@ TOKEN_ENDS = TSPECIALS | WSP
|
||
ASPECIALS = TSPECIALS | set("*'%")
|
||
ATTRIBUTE_ENDS = ASPECIALS | WSP
|
||
EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
|
||
+NLSET = {'\n', '\r'}
|
||
+SPECIALSNL = SPECIALS | NLSET
|
||
|
||
def quote_string(value):
|
||
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
|
||
@@ -2608,6 +2610,13 @@ def _refold_parse_tree(parse_tree, *, policy):
|
||
wrap_as_ew_blocked -= 1
|
||
continue
|
||
tstr = str(part)
|
||
+ if not want_encoding:
|
||
+ if part.token_type == 'ptext':
|
||
+ # Encode if tstr contains special characters.
|
||
+ want_encoding = not SPECIALSNL.isdisjoint(tstr)
|
||
+ else:
|
||
+ # Encode if tstr contains newlines.
|
||
+ want_encoding = not NLSET.isdisjoint(tstr)
|
||
try:
|
||
tstr.encode(encoding)
|
||
charset = encoding
|
||
diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py
|
||
index c9cbadd..d1f4821 100644
|
||
--- a/Lib/email/_policybase.py
|
||
+++ b/Lib/email/_policybase.py
|
||
@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
|
||
message_factory -- the class to use to create new message objects.
|
||
If the value is None, the default is Message.
|
||
|
||
+ verify_generated_headers
|
||
+ -- if true, the generator verifies that each header
|
||
+ they are properly folded, so that a parser won't
|
||
+ treat it as multiple headers, start-of-body, or
|
||
+ part of another header.
|
||
+ This is a check against custom Header & fold()
|
||
+ implementations.
|
||
"""
|
||
|
||
raise_on_defect = False
|
||
@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
|
||
max_line_length = 78
|
||
mangle_from_ = False
|
||
message_factory = None
|
||
+ verify_generated_headers = True
|
||
|
||
def handle_defect(self, obj, defect):
|
||
"""Based on policy, either raise defect or call register_defect.
|
||
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
|
||
index d28a680..1a0d5c6 100644
|
||
--- a/Lib/email/errors.py
|
||
+++ b/Lib/email/errors.py
|
||
@@ -29,6 +29,10 @@ class CharsetError(MessageError):
|
||
"""An illegal charset was given."""
|
||
|
||
|
||
+class HeaderWriteError(MessageError):
|
||
+ """Error while writing headers."""
|
||
+
|
||
+
|
||
# These are parsing defects which the parser was able to work around.
|
||
class MessageDefect(ValueError):
|
||
"""Base class for a message defect."""
|
||
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
|
||
index ae670c2..6deb95b 100644
|
||
--- a/Lib/email/generator.py
|
||
+++ b/Lib/email/generator.py
|
||
@@ -14,12 +14,14 @@ import random
|
||
from copy import deepcopy
|
||
from io import StringIO, BytesIO
|
||
from email.utils import _has_surrogates
|
||
+from email.errors import HeaderWriteError
|
||
|
||
UNDERSCORE = '_'
|
||
NL = '\n' # XXX: no longer used by the code below.
|
||
|
||
NLCRE = re.compile(r'\r\n|\r|\n')
|
||
fcre = re.compile(r'^From ', re.MULTILINE)
|
||
+NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
|
||
|
||
|
||
|
||
@@ -219,7 +221,19 @@ class Generator:
|
||
|
||
def _write_headers(self, msg):
|
||
for h, v in msg.raw_items():
|
||
- self.write(self.policy.fold(h, v))
|
||
+ folded = self.policy.fold(h, v)
|
||
+ if self.policy.verify_generated_headers:
|
||
+ linesep = self.policy.linesep
|
||
+ if not folded.endswith(self.policy.linesep):
|
||
+ raise HeaderWriteError(
|
||
+ f'folded header does not end with {linesep!r}: {folded!r}')
|
||
+ folded_no_linesep = folded
|
||
+ if folded.endswith(linesep):
|
||
+ folded_no_linesep = folded[:-len(linesep)]
|
||
+ if NEWLINE_WITHOUT_FWSP.search(folded_no_linesep):
|
||
+ raise HeaderWriteError(
|
||
+ f'folded header contains newline: {folded!r}')
|
||
+ self.write(folded)
|
||
# A blank line always separates headers from body
|
||
self.write(self._NL)
|
||
|
||
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
|
||
index c1aeaef..cdf1075 100644
|
||
--- a/Lib/test/test_email/test_generator.py
|
||
+++ b/Lib/test/test_email/test_generator.py
|
||
@@ -5,6 +5,7 @@ from email import message_from_string, message_from_bytes
|
||
from email.message import EmailMessage
|
||
from email.generator import Generator, BytesGenerator
|
||
from email import policy
|
||
+import email.errors
|
||
from test.test_email import TestEmailBase, parameterize
|
||
|
||
|
||
@@ -215,6 +216,44 @@ class TestGeneratorBase:
|
||
g.flatten(msg)
|
||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||
|
||
+ def test_keep_encoded_newlines(self):
|
||
+ msg = self.msgmaker(self.typ(textwrap.dedent("""\
|
||
+ To: nobody
|
||
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
|
||
+
|
||
+ None
|
||
+ """)))
|
||
+ expected = textwrap.dedent("""\
|
||
+ To: nobody
|
||
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
|
||
+
|
||
+ None
|
||
+ """)
|
||
+ s = self.ioclass()
|
||
+ g = self.genclass(s, policy=self.policy.clone(max_line_length=80))
|
||
+ g.flatten(msg)
|
||
+ self.assertEqual(s.getvalue(), self.typ(expected))
|
||
+
|
||
+ def test_keep_long_encoded_newlines(self):
|
||
+ msg = self.msgmaker(self.typ(textwrap.dedent("""\
|
||
+ To: nobody
|
||
+ Subject: Bad subject =?UTF-8?Q?=0A?=Bcc: injection@example.com
|
||
+
|
||
+ None
|
||
+ """)))
|
||
+ expected = textwrap.dedent("""\
|
||
+ To: nobody
|
||
+ Subject: Bad subject \n\
|
||
+ =?utf-8?q?=0A?=Bcc:
|
||
+ injection@example.com
|
||
+
|
||
+ None
|
||
+ """)
|
||
+ s = self.ioclass()
|
||
+ g = self.genclass(s, policy=self.policy.clone(max_line_length=30))
|
||
+ g.flatten(msg)
|
||
+ self.assertEqual(s.getvalue(), self.typ(expected))
|
||
+
|
||
|
||
class TestGenerator(TestGeneratorBase, TestEmailBase):
|
||
|
||
@@ -223,6 +262,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase):
|
||
ioclass = io.StringIO
|
||
typ = str
|
||
|
||
+ def test_verify_generated_headers(self):
|
||
+ """gh-121650: by default the generator prevents header injection"""
|
||
+ class LiteralHeader(str):
|
||
+ name = 'Header'
|
||
+ def fold(self, **kwargs):
|
||
+ return self
|
||
+
|
||
+ for text in (
|
||
+ 'Value\r\nBad Injection\r\n',
|
||
+ 'NoNewLine'
|
||
+ ):
|
||
+ with self.subTest(text=text):
|
||
+ message = message_from_string(
|
||
+ "Header: Value\r\n\r\nBody",
|
||
+ policy=self.policy,
|
||
+ )
|
||
+
|
||
+ del message['Header']
|
||
+ message['Header'] = LiteralHeader(text)
|
||
+
|
||
+ with self.assertRaises(email.errors.HeaderWriteError):
|
||
+ message.as_string()
|
||
+
|
||
|
||
class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
|
||
|
||
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
|
||
index 30ce0ba..d5004b3 100644
|
||
--- a/Lib/test/test_email/test_headerregistry.py
|
||
+++ b/Lib/test/test_email/test_headerregistry.py
|
||
@@ -1527,6 +1527,22 @@ class TestAddressAndGroup(TestEmailBase):
|
||
|
||
class TestFolding(TestHeaderBase):
|
||
|
||
+ def test_address_display_names(self):
|
||
+ """Test the folding and encoding of address headers."""
|
||
+ for name, result in (
|
||
+ ('Foo Bar, France', '"Foo Bar, France"'),
|
||
+ ('Foo Bar (France)', '"Foo Bar (France)"'),
|
||
+ ('Foo Bar, España', 'Foo =?utf-8?q?Bar=2C_Espa=C3=B1a?='),
|
||
+ ('Foo Bar (España)', 'Foo Bar =?utf-8?b?KEVzcGHDsWEp?='),
|
||
+ ('Foo, Bar España', '=?utf-8?q?Foo=2C_Bar_Espa=C3=B1a?='),
|
||
+ ('Foo, Bar [España]', '=?utf-8?q?Foo=2C_Bar_=5BEspa=C3=B1a=5D?='),
|
||
+ ('Foo Bär, France', 'Foo =?utf-8?q?B=C3=A4r=2C?= France'),
|
||
+ ('Foo Bär <France>', 'Foo =?utf-8?q?B=C3=A4r_=3CFrance=3E?='),
|
||
+ ):
|
||
+ h = self.make_header('To', Address(name, addr_spec='a@b.com'))
|
||
+ self.assertEqual(h.fold(policy=policy.default),
|
||
+ 'To: %s <a@b.com>\n' % result)
|
||
+
|
||
def test_short_unstructured(self):
|
||
h = self.make_header('subject', 'this is a test')
|
||
self.assertEqual(h.fold(policy=policy.default),
|
||
diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py
|
||
index 8fecb8a..6793422 100644
|
||
--- a/Lib/test/test_email/test_policy.py
|
||
+++ b/Lib/test/test_email/test_policy.py
|
||
@@ -25,6 +25,7 @@ class PolicyAPITests(unittest.TestCase):
|
||
'raise_on_defect': False,
|
||
'mangle_from_': True,
|
||
'message_factory': None,
|
||
+ 'verify_generated_headers': True,
|
||
}
|
||
# These default values are the ones set on email.policy.default.
|
||
# If any of these defaults change, the docs must be updated.
|
||
@@ -237,6 +238,31 @@ class PolicyAPITests(unittest.TestCase):
|
||
email.policy.EmailPolicy.header_factory)
|
||
self.assertEqual(newpolicy.__dict__, {'raise_on_defect': True})
|
||
|
||
+ def test_verify_generated_headers(self):
|
||
+ """Turning protection off allows header injection"""
|
||
+ policy = email.policy.default.clone(verify_generated_headers=False)
|
||
+ for text in (
|
||
+ 'Header: Value\r\nBad: Injection\r\n',
|
||
+ 'Header: NoNewLine'
|
||
+ ):
|
||
+ with self.subTest(text=text):
|
||
+ message = email.message_from_string(
|
||
+ "Header: Value\r\n\r\nBody",
|
||
+ policy=policy,
|
||
+ )
|
||
+ class LiteralHeader(str):
|
||
+ name = 'Header'
|
||
+ def fold(self, **kwargs):
|
||
+ return self
|
||
+
|
||
+ del message['Header']
|
||
+ message['Header'] = LiteralHeader(text)
|
||
+
|
||
+ self.assertEqual(
|
||
+ message.as_string(),
|
||
+ f"{text}\nBody",
|
||
+ )
|
||
+
|
||
# XXX: Need subclassing tests.
|
||
# For adding subclassed objects, make sure the usual rules apply (subclass
|
||
# wins), but that the order still works (right overrides left).
|
||
diff --git a/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst
|
||
new file mode 100644
|
||
index 0000000..e09ff63
|
||
--- /dev/null
|
||
+++ b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst
|
||
@@ -0,0 +1 @@
|
||
+Fix serialization of display name in originator or destination address fields with both encoded words and special chars.
|
||
diff --git a/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
|
||
new file mode 100644
|
||
index 0000000..83dd28d
|
||
--- /dev/null
|
||
+++ b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
|
||
@@ -0,0 +1,5 @@
|
||
+:mod:`email` headers with embedded newlines are now quoted on output. The
|
||
+:mod:`~email.generator` will now refuse to serialize (write) headers that
|
||
+are unsafely folded or delimited; see
|
||
+:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas
|
||
+Bloemsaat and Petr Viktorin in :gh:`121650`.)
|
||
--
|
||
2.45.2
|
||
|