From b92eb0d9ca0db4432f4c694399d2b69a4cd696ea Mon Sep 17 00:00:00 2001 From: Sergio Correia Date: Fri, 16 May 2025 13:56:07 +0000 Subject: [PATCH 2/2] httputil: Raise errors instead of logging in multipart/form-data parsing Backport of upstream PR: https://github.com/tornadoweb/tornado/pull/3497 Signed-off-by: Sergio Correia --- tornado/httputil.py | 30 +++++++++++------------------- tornado/test/httpserver_test.py | 4 ++-- tornado/test/httputil_test.py | 13 ++++++++----- tornado/web.py | 17 +++++++++++++---- 4 files changed, 34 insertions(+), 30 deletions(-) diff --git a/tornado/httputil.py b/tornado/httputil.py index ebdc805..090a977 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -34,7 +34,6 @@ import unicodedata from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl from tornado.escape import native_str, parse_qs_bytes, utf8 -from tornado.log import gen_log from tornado.util import ObjectDict, unicode_type @@ -762,25 +761,22 @@ def parse_body_arguments( """ if content_type.startswith("application/x-www-form-urlencoded"): if headers and "Content-Encoding" in headers: - gen_log.warning( - "Unsupported Content-Encoding: %s", headers["Content-Encoding"] + raise HTTPInputError( + "Unsupported Content-Encoding: %s" % headers["Content-Encoding"] ) - return try: # real charset decoding will happen in RequestHandler.decode_argument() uri_arguments = parse_qs_bytes(body, keep_blank_values=True) except Exception as e: - gen_log.warning("Invalid x-www-form-urlencoded body: %s", e) - uri_arguments = {} + raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) elif content_type.startswith("multipart/form-data"): if headers and "Content-Encoding" in headers: - gen_log.warning( - "Unsupported Content-Encoding: %s", headers["Content-Encoding"] + raise HTTPInputError( + "Unsupported Content-Encoding: %s" % headers["Content-Encoding"] ) - return try: fields = content_type.split(";") for field in fields: @@ -789,9 +785,9 @@ def parse_body_arguments( parse_multipart_form_data(utf8(v), body, arguments, files) break else: - raise ValueError("multipart boundary not found") + raise HTTPInputError("multipart boundary not found") except Exception as e: - gen_log.warning("Invalid multipart/form-data: %s", e) + raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e def parse_multipart_form_data( @@ -820,26 +816,22 @@ def parse_multipart_form_data( boundary = boundary[1:-1] final_boundary_index = data.rfind(b"--" + boundary + b"--") if final_boundary_index == -1: - gen_log.warning("Invalid multipart/form-data: no final boundary") - return + raise HTTPInputError("Invalid multipart/form-data: no final boundary found") parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n") for part in parts: if not part: continue eoh = part.find(b"\r\n\r\n") if eoh == -1: - gen_log.warning("multipart/form-data missing headers") - continue + raise HTTPInputError("multipart/form-data missing headers") headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) disp_header = headers.get("Content-Disposition", "") disposition, disp_params = _parse_header(disp_header) if disposition != "form-data" or not part.endswith(b"\r\n"): - gen_log.warning("Invalid multipart/form-data") - continue + raise HTTPInputError("Invalid multipart/form-data") value = part[eoh + 4 : -2] if not disp_params.get("name"): - gen_log.warning("multipart/form-data value missing name") - continue + raise HTTPInputError("multipart/form-data missing name") name = disp_params["name"] if disp_params.get("filename"): ctype = headers.get("Content-Type", "application/unknown") diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index 0b29a39..5d5fb13 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -1131,9 +1131,9 @@ class GzipUnsupportedTest(GzipBaseTest, AsyncHTTPTestCase): # Gzip support is opt-in; without it the server fails to parse # the body (but parsing form bodies is currently just a log message, # not a fatal error). - with ExpectLog(gen_log, "Unsupported Content-Encoding"): + with ExpectLog(gen_log, ".*Unsupported Content-Encoding"): response = self.post_gzip("foo=bar") - self.assertEqual(json_decode(response.body), {}) + self.assertEqual(response.code, 400) class StreamingChunkSizeTest(AsyncHTTPTestCase): diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py index 975900a..9494d0c 100644 --- a/tornado/test/httputil_test.py +++ b/tornado/test/httputil_test.py @@ -12,7 +12,6 @@ from tornado.httputil import ( ) from tornado.escape import utf8, native_str from tornado.log import gen_log -from tornado.testing import ExpectLog from tornado.test.util import ignore_deprecation import copy @@ -195,7 +194,9 @@ Foo b"\n", b"\r\n" ) args, files = form_data_args() - with ExpectLog(gen_log, "multipart/form-data missing headers"): + with self.assertRaises( + HTTPInputError, msg="multipart/form-data missing headers" + ): parse_multipart_form_data(b"1234", data, args, files) self.assertEqual(files, {}) @@ -209,7 +210,7 @@ Foo b"\n", b"\r\n" ) args, files = form_data_args() - with ExpectLog(gen_log, "Invalid multipart/form-data"): + with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"): parse_multipart_form_data(b"1234", data, args, files) self.assertEqual(files, {}) @@ -222,7 +223,7 @@ Foo--1234--""".replace( b"\n", b"\r\n" ) args, files = form_data_args() - with ExpectLog(gen_log, "Invalid multipart/form-data"): + with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"): parse_multipart_form_data(b"1234", data, args, files) self.assertEqual(files, {}) @@ -236,7 +237,9 @@ Foo b"\n", b"\r\n" ) args, files = form_data_args() - with ExpectLog(gen_log, "multipart/form-data value missing name"): + with self.assertRaises( + HTTPInputError, msg="multipart/form-data value missing name" + ): parse_multipart_form_data(b"1234", data, args, files) self.assertEqual(files, {}) diff --git a/tornado/web.py b/tornado/web.py index 0393964..8ec5601 100644 --- a/tornado/web.py +++ b/tornado/web.py @@ -1751,6 +1751,14 @@ class RequestHandler(object): try: if self.request.method not in self.SUPPORTED_METHODS: raise HTTPError(405) + + # If we're not in stream_request_body mode, this is the place where we parse the body. + if not _has_stream_request_body(self.__class__): + try: + self.request._parse_body() + except httputil.HTTPInputError as e: + raise HTTPError(400, "Invalid body: %s" % e) from e + self.path_args = [self.decode_argument(arg) for arg in args] self.path_kwargs = dict( (k, self.decode_argument(v, name=k)) for (k, v) in kwargs.items() @@ -1941,7 +1949,7 @@ def _has_stream_request_body(cls: Type[RequestHandler]) -> bool: def removeslash( - method: Callable[..., Optional[Awaitable[None]]] + method: Callable[..., Optional[Awaitable[None]]], ) -> Callable[..., Optional[Awaitable[None]]]: """Use this decorator to remove trailing slashes from the request path. @@ -1970,7 +1978,7 @@ def removeslash( def addslash( - method: Callable[..., Optional[Awaitable[None]]] + method: Callable[..., Optional[Awaitable[None]]], ) -> Callable[..., Optional[Awaitable[None]]]: """Use this decorator to add a missing trailing slash to the request path. @@ -2394,8 +2402,9 @@ class _HandlerDelegate(httputil.HTTPMessageDelegate): if self.stream_request_body: future_set_result_unless_cancelled(self.request._body_future, None) else: + # Note that the body gets parsed in RequestHandler._execute so it can be in + # the right exception handler scope. self.request.body = b"".join(self.chunks) - self.request._parse_body() self.execute() def on_connection_close(self) -> None: @@ -3267,7 +3276,7 @@ class GZipContentEncoding(OutputTransform): def authenticated( - method: Callable[..., Optional[Awaitable[None]]] + method: Callable[..., Optional[Awaitable[None]]], ) -> Callable[..., Optional[Awaitable[None]]]: """Decorate methods with this to require that the user be logged in. -- 2.49.0