From 377ecb4c3bb1bc5e196f13ed7418c3d4b6380afc Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Wed, 3 Jun 2026 13:02:33 +0200 Subject: [PATCH] CVE-2026-44432 * Avoid any decoding in `HTTPResponse.drain_conn` * Add a comment * Simplify `drain_conn` * Add tests * Add additional checks to the test * Fix full decompression on the 2nd small read from response using Brotli * Add a changelog entry * Inverse the order in the changelog entry * Mention `stream` call --- changelog/GHSA-mf9v-mfxr-j63j.bugfix.rst | 7 +++++++ src/urllib3/response.py | 17 +++++++++++------ test/test_response.py | 24 +++++++++++++++++++++--- 3 files changed, 39 insertions(+), 9 deletions(-) create mode 100644 changelog/GHSA-mf9v-mfxr-j63j.bugfix.rst diff --git a/changelog/GHSA-mf9v-mfxr-j63j.bugfix.rst b/changelog/GHSA-mf9v-mfxr-j63j.bugfix.rst new file mode 100644 index 0000000..ac70af8 --- /dev/null +++ b/changelog/GHSA-mf9v-mfxr-j63j.bugfix.rst @@ -0,0 +1,7 @@ +Fixed two high-severity security issues where decompression-bomb safeguards of the streaming API were bypassed: + + +1. When ``HTTPResponse.drain_conn()`` was called after the response had been read and decompressed partially. +2. During the second ``HTTPResponse.read(amt=N)`` or ``HTTPResponse.stream(amt=N)`` call when the response was decompressed using the official `Brotli `__ library. + +See `GHSA-mf9v-mfxr-j63j `__ for details. diff --git a/src/urllib3/response.py b/src/urllib3/response.py index 1357d65..76c56d2 100644 --- a/src/urllib3/response.py +++ b/src/urllib3/response.py @@ -480,13 +480,14 @@ class HTTPResponse(io.IOBase): Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. """ try: - self.read( - # Do not spend resources decoding the content unless - # decoding has already been initiated. - decode_content=self._has_decoded_content, - ) + self._raw_read() except (HTTPError, SocketError, BaseSSLError, HTTPException): pass + if self._has_decoded_content: + # `_raw_read` skips decompression, so we should clean up the + # decoder to avoid keeping unnecessary data in memory. + self._decoded_buffer = BytesQueueBuffer() + self._decoder = None @property def data(self): @@ -800,7 +801,11 @@ class HTTPResponse(io.IOBase): if amt is not None: cache_content = False - if self._decoder and self._decoder.has_unconsumed_tail: + if ( + self._decoder + and self._decoder.has_unconsumed_tail + and len(self._decoded_buffer) < amt + ): decoded_data = self._decode( b"", decode_content, diff --git a/test/test_response.py b/test/test_response.py index 597ce46..b0539e5 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -469,22 +469,33 @@ class TestResponse(object): pytest.skip(f"Proper {request.node.callspec.id} decoder is not available") name, compressed_data = data - limit = 1024 * 1024 # 1 MiB + limit1 = 1024 * 1024 # 1 MiB + # We test with two read calls because the second call may be + # able to use the internal buffer filled by the first call, and + # we want to ensure that full decompression is never triggered + # by the second call. The limit for the second call is lowered + # to make sure that the internal buffer is used for the Brotli + # case specifically https://github.com/google/brotli/issues/1396 + limit2 = 1024 # 1 KiB if read_method in ("read_chunked", "stream"): httplib_r = httplib.HTTPResponse(MockSock) # type: ignore[arg-type] + httplib_r.chunked = True + httplib_r.chunk_left = 1 httplib_r.fp = MockChunkedEncodingResponse([compressed_data]) # type: ignore[assignment] r = HTTPResponse( httplib_r, preload_content=False, headers={"transfer-encoding": "chunked", "content-encoding": name}, ) - next(getattr(r, read_method)(amt=limit, decode_content=True)) + for limit in (limit1, limit2): + next(getattr(r, read_method)(amt=limit, decode_content=True)) else: fp = BytesIO(compressed_data) r = HTTPResponse( fp, headers={"content-encoding": name}, preload_content=False ) - getattr(r, read_method)(amt=limit, decode_content=True) + for limit in (limit1, limit2): + getattr(r, read_method)(amt=limit, decode_content=True) # Check that the internal decoded buffer is empty unless brotli # is used. @@ -494,6 +505,13 @@ class TestResponse(object): if name != "br" or brotli.__name__ == "brotlicffi": assert len(r._decoded_buffer) == 0 + # Check that memory usage is still within the limit while the + # connection is being drained, meaning that the call does not + # decompress the whole content. + r.drain_conn() + assert r._decoder is None + assert len(r._decoded_buffer) == 0 + def test_multi_decoding_deflate_deflate(self): data = zlib.compress(zlib.compress(b"foo")) -- 2.54.0