From 12283d0189eb03395f86903d993899dec092619b Mon Sep 17 00:00:00 2001 From: eabdullin Date: Mon, 26 Jan 2026 15:42:18 +0000 Subject: [PATCH] import UBI python3.12-urllib3-1.26.19-1.el9_7.1 --- .gitignore | 2 +- .python3.12-urllib3.metadata | 2 +- SOURCES/CVE-2025-66418.patch | 75 ++ SOURCES/CVE-2025-66471.patch | 1352 +++++++++++++++++++++++++++++++++ SOURCES/CVE-2026-21441.patch | 219 ++++++ SPECS/python3.12-urllib3.spec | 19 +- 6 files changed, 1664 insertions(+), 5 deletions(-) create mode 100644 SOURCES/CVE-2025-66418.patch create mode 100644 SOURCES/CVE-2025-66471.patch create mode 100644 SOURCES/CVE-2026-21441.patch diff --git a/.gitignore b/.gitignore index 992ae29..d81c293 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/urllib3-1.26.18.tar.gz +SOURCES/urllib3-1.26.19.tar.gz diff --git a/.python3.12-urllib3.metadata b/.python3.12-urllib3.metadata index 9d6125a..5071c4d 100644 --- a/.python3.12-urllib3.metadata +++ b/.python3.12-urllib3.metadata @@ -1 +1 @@ -bffc545083d4fe74a6ce953599c0c2c7012b1833 SOURCES/urllib3-1.26.18.tar.gz +05ec9b41b14659fd4350c8a220e1d31e79169e5a SOURCES/urllib3-1.26.19.tar.gz diff --git a/SOURCES/CVE-2025-66418.patch b/SOURCES/CVE-2025-66418.patch new file mode 100644 index 0000000..5276fc3 --- /dev/null +++ b/SOURCES/CVE-2025-66418.patch @@ -0,0 +1,75 @@ +From a5505e56f5bc1761beba7685309e6d7314a9588c Mon Sep 17 00:00:00 2001 +From: Illia Volochii +Date: Fri, 5 Dec 2025 16:41:33 +0200 +Subject: [PATCH] Security fix for CVE-2025-66418 + +* Add a hard-coded limit for the decompression chain + +* Reuse new list + +(cherry picked from commit 24d7b67eac89f94e11003424bcf0d8f7b72222a8) +--- + changelog/GHSA-gm62-xv2j-4w53.security.rst | 4 ++++ + src/urllib3/response.py | 12 +++++++++++- + test/test_response.py | 10 ++++++++++ + 3 files changed, 25 insertions(+), 1 deletion(-) + create mode 100644 changelog/GHSA-gm62-xv2j-4w53.security.rst + +diff --git a/changelog/GHSA-gm62-xv2j-4w53.security.rst b/changelog/GHSA-gm62-xv2j-4w53.security.rst +new file mode 100644 +index 00000000..6646eaa3 +--- /dev/null ++++ b/changelog/GHSA-gm62-xv2j-4w53.security.rst +@@ -0,0 +1,4 @@ ++Fixed a security issue where an attacker could compose an HTTP response with ++virtually unlimited links in the ``Content-Encoding`` header, potentially ++leading to a denial of service (DoS) attack by exhausting system resources ++during decoding. The number of allowed chained encodings is now limited to 5. +diff --git a/src/urllib3/response.py b/src/urllib3/response.py +index d0665533..f945c41c 100644 +--- a/src/urllib3/response.py ++++ b/src/urllib3/response.py +@@ -224,8 +224,18 @@ class MultiDecoder(object): + they were applied. + """ + ++ # Maximum allowed number of chained HTTP encodings in the ++ # Content-Encoding header. ++ max_decode_links = 5 ++ + def __init__(self, modes): +- self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] ++ encodings = [m.strip() for m in modes.split(",")] ++ if len(encodings) > self.max_decode_links: ++ raise DecodeError( ++ "Too many content encodings in the chain: " ++ f"{len(encodings)} > {self.max_decode_links}" ++ ) ++ self._decoders = [_get_decoder(e) for e in encodings] + + def flush(self): + return self._decoders[0].flush() +diff --git a/test/test_response.py b/test/test_response.py +index f949b2b1..33f570c4 100644 +--- a/test/test_response.py ++++ b/test/test_response.py +@@ -543,6 +543,16 @@ class TestResponse(object): + assert r.read(9 * 37) == b"foobarbaz" * 37 + assert r.read() == b"" + ++ def test_read_multi_decoding_too_many_links(self): ++ fp = BytesIO(b"foo") ++ with pytest.raises( ++ DecodeError, match="Too many content encodings in the chain: 6 > 5" ++ ): ++ HTTPResponse( ++ fp, ++ headers={"content-encoding": "gzip, deflate, br, zstd, gzip, deflate"}, ++ ) ++ + def test_body_blob(self): + resp = HTTPResponse(b"foo") + assert resp.data == b"foo" +-- +2.52.0 + diff --git a/SOURCES/CVE-2025-66471.patch b/SOURCES/CVE-2025-66471.patch new file mode 100644 index 0000000..935b431 --- /dev/null +++ b/SOURCES/CVE-2025-66471.patch @@ -0,0 +1,1352 @@ +From 9c69597d5add6fe64652f0c3c4de6b4878b5a939 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= +Date: Tue, 16 Dec 2025 12:54:12 +0100 +Subject: [PATCH 1/5] Add Python 3.12 to nox config + +--- + noxfile.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/noxfile.py b/noxfile.py +index 32366a57..3503de98 100644 +--- a/noxfile.py ++++ b/noxfile.py +@@ -45,7 +45,7 @@ def tests_impl(session, extras="socks,secure,brotli"): + ) + + +-@nox.session(python=["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "pypy"]) ++@nox.session(python=["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy"]) + def test(session): + tests_impl(session) + +-- +2.52.0 + + +From 033eb797df2bc488148c8d267d2c9fab1d802487 Mon Sep 17 00:00:00 2001 +From: Quentin Pradet +Date: Mon, 14 Nov 2022 16:54:47 +0400 +Subject: [PATCH 2/5] Standardize HTTPResponse.read(X) behavior regardless of + compression + +Co-authored-by: Franek Magiera +(cherry picked from commit c35033f6cc54106ca66ef2d48a9e3564d4fb0e07) +--- + changelog/2128.removal.rst | 7 + + src/urllib3/response.py | 167 +++++++++++++++++----- + test/test_response.py | 162 ++++++++++++++++++--- + test/with_dummyserver/test_socketlevel.py | 9 +- + 4 files changed, 278 insertions(+), 67 deletions(-) + create mode 100644 changelog/2128.removal.rst + +diff --git a/changelog/2128.removal.rst b/changelog/2128.removal.rst +new file mode 100644 +index 00000000..abf412f9 +--- /dev/null ++++ b/changelog/2128.removal.rst +@@ -0,0 +1,7 @@ ++Standardized :meth:`~urllib3.response.HTTPResponse.read` to respect the semantics of BufferedIOBase regardless of compression. Specifically, this method: ++ ++* only returns an empty bytes object to indicate EOF (that is, the response has been fully consumed), ++* never returns more bytes than requested, ++* can issue any number of system calls: zero, one or multiple. ++ ++If you want each :meth:`~urllib3.response.HTTPResponse.read` call to issue a single system call, you need to disable decompression by setting ``decode_content=False``. +diff --git a/src/urllib3/response.py b/src/urllib3/response.py +index 0bd13d40..9793de48 100644 +--- a/src/urllib3/response.py ++++ b/src/urllib3/response.py +@@ -1,5 +1,6 @@ + from __future__ import absolute_import + ++import collections + import io + import logging + import sys +@@ -160,6 +161,63 @@ def _get_decoder(mode): + return DeflateDecoder() + + ++class BytesQueueBuffer: ++ """Memory-efficient bytes buffer ++ ++ To return decoded data in read() and still follow the BufferedIOBase API, we need a ++ buffer to always return the correct amount of bytes. ++ ++ This buffer should be filled using calls to put() ++ ++ Our maximum memory usage is determined by the sum of the size of: ++ ++ * self.buffer, which contains the full data ++ * the largest chunk that we will copy in get() ++ ++ The worst case scenario is a single chunk, in which case we'll make a full copy of ++ the data inside get(). ++ """ ++ ++ def __init__(self): ++ self.buffer = collections.deque() ++ self._size = 0 ++ ++ def __len__(self): ++ return self._size ++ ++ def put(self, data): ++ self.buffer.append(data) ++ self._size += len(data) ++ ++ def get(self, n): ++ if not self.buffer: ++ raise RuntimeError("buffer is empty") ++ elif n < 0: ++ raise ValueError("n should be > 0") ++ ++ fetched = 0 ++ ret = io.BytesIO() ++ while fetched < n: ++ remaining = n - fetched ++ chunk = self.buffer.popleft() ++ chunk_length = len(chunk) ++ if remaining < chunk_length: ++ left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] ++ ret.write(left_chunk) ++ self.buffer.appendleft(right_chunk) ++ self._size -= remaining ++ break ++ else: ++ ret.write(chunk) ++ self._size -= chunk_length ++ fetched += chunk_length ++ ++ if not self.buffer: ++ break ++ ++ return ret.getvalue() ++ ++ + class HTTPResponse(io.IOBase): + """ + HTTP Response container. +@@ -261,6 +319,9 @@ class HTTPResponse(io.IOBase): + # Determine length of response + self.length_remaining = self._init_length(request_method) + ++ # Used to return the correct amount of bytes for partial read()s ++ self._decoded_buffer = BytesQueueBuffer() ++ + # If requested, preload the body. + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) +@@ -532,6 +593,45 @@ class HTTPResponse(io.IOBase): + # StringIO doesn't like amt=None + return self._fp.read(amt) if amt is not None else self._fp.read() + ++ def _raw_read(self, amt=None): ++ """ ++ Reads `amt` of bytes from the socket. ++ """ ++ if self._fp is None: ++ return None # type: ignore[return-value] ++ ++ fp_closed = getattr(self._fp, "closed", False) ++ ++ with self._error_catcher(): ++ data = self._fp_read(amt) if not fp_closed else b"" ++ if amt is not None and amt != 0 and not data: ++ # Platform-specific: Buggy versions of Python. ++ # Close the connection when no data is returned ++ # ++ # This is redundant to what httplib/http.client _should_ ++ # already do. However, versions of python released before ++ # December 15, 2012 (http://bugs.python.org/issue16298) do ++ # not properly close the connection in all cases. There is ++ # no harm in redundantly calling close. ++ self._fp.close() ++ if ( ++ self.enforce_content_length ++ and self.length_remaining is not None ++ and self.length_remaining != 0 ++ ): ++ # This is an edge case that httplib failed to cover due ++ # to concerns of backward compatibility. We're ++ # addressing it here to make sure IncompleteRead is ++ # raised during streaming, so all calls with incorrect ++ # Content-Length are caught. ++ raise IncompleteRead(self._fp_bytes_read, self.length_remaining) ++ ++ if data: ++ self._fp_bytes_read += len(data) ++ if self.length_remaining is not None: ++ self.length_remaining -= len(data) ++ return data ++ + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`http.client.HTTPResponse.read`, but with two additional +@@ -557,50 +657,43 @@ class HTTPResponse(io.IOBase): + if decode_content is None: + decode_content = self.decode_content + +- if self._fp is None: +- return ++ if amt is not None: ++ cache_content = False ++ ++ if len(self._decoded_buffer) >= amt: ++ return self._decoded_buffer.get(amt) ++ ++ data = self._raw_read(amt) + + flush_decoder = False +- fp_closed = getattr(self._fp, "closed", False) ++ if amt is None: ++ flush_decoder = True ++ elif amt != 0 and not data: ++ flush_decoder = True + +- with self._error_catcher(): +- data = self._fp_read(amt) if not fp_closed else b"" +- if amt is None: +- flush_decoder = True +- else: +- cache_content = False +- if ( +- amt != 0 and not data +- ): # Platform-specific: Buggy versions of Python. +- # Close the connection when no data is returned +- # +- # This is redundant to what httplib/http.client _should_ +- # already do. However, versions of python released before +- # December 15, 2012 (http://bugs.python.org/issue16298) do +- # not properly close the connection in all cases. There is +- # no harm in redundantly calling close. +- self._fp.close() +- flush_decoder = True +- if self.enforce_content_length and self.length_remaining not in ( +- 0, +- None, +- ): +- # This is an edge case that httplib failed to cover due +- # to concerns of backward compatibility. We're +- # addressing it here to make sure IncompleteRead is +- # raised during streaming, so all calls with incorrect +- # Content-Length are caught. +- raise IncompleteRead(self._fp_bytes_read, self.length_remaining) +- +- if data: +- self._fp_bytes_read += len(data) +- if self.length_remaining is not None: +- self.length_remaining -= len(data) ++ if not data and len(self._decoded_buffer) == 0: ++ return data + ++ if amt is None: + data = self._decode(data, decode_content, flush_decoder) +- + if cache_content: + self._body = data ++ else: ++ # do not waste memory on buffer when not decoding ++ if not decode_content: ++ return data ++ ++ decoded_data = self._decode(data, decode_content, flush_decoder) ++ self._decoded_buffer.put(decoded_data) ++ ++ while len(self._decoded_buffer) < amt and data: ++ # TODO make sure to initially read enough data to get past the headers ++ # For example, the GZ file header takes 10 bytes, we don't want to read ++ # it one byte at a time ++ data = self._raw_read(amt) ++ decoded_data = self._decode(data, decode_content, flush_decoder) ++ self._decoded_buffer.put(decoded_data) ++ data = self._decoded_buffer.get(amt) + + return data + +diff --git a/test/test_response.py b/test/test_response.py +index e09e3858..44a630af 100644 +--- a/test/test_response.py ++++ b/test/test_response.py +@@ -4,6 +4,7 @@ import contextlib + import re + import socket + import ssl ++import sys + import zlib + from base64 import b64decode + from io import BufferedReader, BytesIO, TextIOWrapper +@@ -25,10 +26,60 @@ from urllib3.exceptions import ( + httplib_IncompleteRead, + ) + from urllib3.packages.six.moves import http_client as httplib +-from urllib3.response import HTTPResponse, brotli ++from urllib3.response import HTTPResponse, BytesQueueBuffer, brotli + from urllib3.util.response import is_fp_closed + from urllib3.util.retry import RequestHistory, Retry + ++ ++class TestBytesQueueBuffer: ++ def test_single_chunk(self): ++ buffer = BytesQueueBuffer() ++ assert len(buffer) == 0 ++ with pytest.raises(RuntimeError, match="buffer is empty"): ++ assert buffer.get(10) ++ ++ buffer.put(b"foo") ++ with pytest.raises(ValueError, match="n should be > 0"): ++ buffer.get(-1) ++ ++ assert buffer.get(1) == b"f" ++ assert buffer.get(2) == b"oo" ++ with pytest.raises(RuntimeError, match="buffer is empty"): ++ assert buffer.get(10) ++ ++ def test_read_too_much(self): ++ buffer = BytesQueueBuffer() ++ buffer.put(b"foo") ++ assert buffer.get(100) == b"foo" ++ ++ def test_multiple_chunks(self): ++ buffer = BytesQueueBuffer() ++ buffer.put(b"foo") ++ buffer.put(b"bar") ++ buffer.put(b"baz") ++ assert len(buffer) == 9 ++ ++ assert buffer.get(1) == b"f" ++ assert len(buffer) == 8 ++ assert buffer.get(4) == b"ooba" ++ assert len(buffer) == 4 ++ assert buffer.get(4) == b"rbaz" ++ assert len(buffer) == 0 ++ ++ @pytest.mark.skipif( ++ sys.version_info < (3, 8), reason="pytest-memray requires Python 3.8+" ++ ) ++ @pytest.mark.limit_memory("12.5 MB") # assert that we're not doubling memory usage ++ def test_memory_usage(self): ++ # Allocate 10 1MiB chunks ++ buffer = BytesQueueBuffer() ++ for i in range(10): ++ # This allocates 2MiB, putting the max at around 12MiB. Not sure why. ++ buffer.put(bytes(2**20)) ++ ++ assert len(buffer.get(10 * 2**20)) == 10 * 2**20 ++ ++ + # A known random (i.e, not-too-compressible) payload generated with: + # "".join(random.choice(string.printable) for i in xrange(512)) + # .encode("zlib").encode("base64") +@@ -76,9 +127,19 @@ class TestLegacyResponse(object): + + class TestResponse(object): + def test_cache_content(self): +- r = HTTPResponse("foo") +- assert r.data == "foo" +- assert r._body == "foo" ++ r = HTTPResponse(b"foo") ++ assert r._body == b"foo" ++ assert r.data == b"foo" ++ assert r._body == b"foo" ++ ++ def test_cache_content_preload_false(self): ++ fp = BytesIO(b"foo") ++ r = HTTPResponse(fp, preload_content=False) ++ ++ assert not r._body ++ assert r.data == b"foo" ++ assert r._body == b"foo" ++ assert r.data == b"foo" + + def test_default(self): + r = HTTPResponse() +@@ -143,12 +204,7 @@ class TestResponse(object): + fp, headers={"content-encoding": "deflate"}, preload_content=False + ) + +- assert r.read(3) == b"" +- # Buffer in case we need to switch to the raw stream +- assert r._decoder._data is not None + assert r.read(1) == b"f" +- # Now that we've decoded data, we just stream through the decoder +- assert r._decoder._data is None + assert r.read(2) == b"oo" + assert r.read() == b"" + assert r.read() == b"" +@@ -163,10 +219,7 @@ class TestResponse(object): + fp, headers={"content-encoding": "deflate"}, preload_content=False + ) + +- assert r.read(1) == b"" + assert r.read(1) == b"f" +- # Once we've decoded data, we just stream to the decoder; no buffering +- assert r._decoder._data is None + assert r.read(2) == b"oo" + assert r.read() == b"" + assert r.read() == b"" +@@ -181,7 +234,6 @@ class TestResponse(object): + fp, headers={"content-encoding": "gzip"}, preload_content=False + ) + +- assert r.read(11) == b"" + assert r.read(1) == b"f" + assert r.read(2) == b"oo" + assert r.read() == b"" +@@ -295,6 +347,23 @@ class TestResponse(object): + + assert r.data == b"foo" + ++ def test_read_multi_decoding_deflate_deflate(self): ++ msg = b"foobarbaz" * 42 ++ data = zlib.compress(zlib.compress(msg)) ++ ++ fp = BytesIO(data) ++ r = HTTPResponse( ++ fp, headers={"content-encoding": "deflate, deflate"}, preload_content=False ++ ) ++ ++ assert r.read(3) == b"foo" ++ assert r.read(3) == b"bar" ++ assert r.read(3) == b"baz" ++ assert r.read(9) == b"foobarbaz" ++ assert r.read(9 * 3) == b"foobarbaz" * 3 ++ assert r.read(9 * 37) == b"foobarbaz" * 37 ++ assert r.read() == b"" ++ + def test_body_blob(self): + resp = HTTPResponse(b"foo") + assert resp.data == b"foo" +@@ -491,8 +560,8 @@ class TestResponse(object): + ) + stream = resp.stream(2) + +- assert next(stream) == b"f" +- assert next(stream) == b"oo" ++ assert next(stream) == b"fo" ++ assert next(stream) == b"o" + with pytest.raises(StopIteration): + next(stream) + +@@ -521,6 +590,7 @@ class TestResponse(object): + # Ensure that ``tell()`` returns the correct number of bytes when + # part-way through streaming compressed content. + NUMBER_OF_READS = 10 ++ PART_SIZE = 64 + + class MockCompressedDataReading(BytesIO): + """ +@@ -549,7 +619,7 @@ class TestResponse(object): + resp = HTTPResponse( + fp, headers={"content-encoding": "deflate"}, preload_content=False + ) +- stream = resp.stream() ++ stream = resp.stream(PART_SIZE) + + parts_positions = [(part, resp.tell()) for part in stream] + end_of_stream = resp.tell() +@@ -564,12 +634,28 @@ class TestResponse(object): + assert uncompressed_data == payload + + # Check that the positions in the stream are correct +- expected = [(i + 1) * payload_part_size for i in range(NUMBER_OF_READS)] +- assert expected == list(positions) ++ # It is difficult to determine programatically what the positions ++ # returned by `tell` will be because the `HTTPResponse.read` method may ++ # call socket `read` a couple of times if it doesn't have enough data ++ # in the buffer or not call socket `read` at all if it has enough. All ++ # this depends on the message, how it was compressed, what is ++ # `PART_SIZE` and `payload_part_size`. ++ # So for simplicity the expected values are hardcoded. ++ expected = (92, 184, 230, 276, 322, 368, 414, 460) ++ assert expected == positions + + # Check that the end of the stream is in the correct place + assert len(ZLIB_PAYLOAD) == end_of_stream + ++ # Check that all parts have expected length ++ expected_last_part_size = len(uncompressed_data) % PART_SIZE ++ whole_parts = len(uncompressed_data) // PART_SIZE ++ if expected_last_part_size == 0: ++ expected_lengths = [PART_SIZE] * whole_parts ++ else: ++ expected_lengths = [PART_SIZE] * whole_parts + [expected_last_part_size] ++ assert expected_lengths == [len(part) for part in parts] ++ + def test_deflate_streaming(self): + data = zlib.compress(b"foo") + +@@ -579,8 +665,8 @@ class TestResponse(object): + ) + stream = resp.stream(2) + +- assert next(stream) == b"f" +- assert next(stream) == b"oo" ++ assert next(stream) == b"fo" ++ assert next(stream) == b"o" + with pytest.raises(StopIteration): + next(stream) + +@@ -595,8 +681,8 @@ class TestResponse(object): + ) + stream = resp.stream(2) + +- assert next(stream) == b"f" +- assert next(stream) == b"oo" ++ assert next(stream) == b"fo" ++ assert next(stream) == b"o" + with pytest.raises(StopIteration): + next(stream) + +@@ -608,6 +694,38 @@ class TestResponse(object): + with pytest.raises(StopIteration): + next(stream) + ++ @pytest.mark.parametrize( ++ "preload_content, amt", ++ [(True, None), (False, None), (False, 10 * 2**20)], ++ ) ++ @pytest.mark.limit_memory("25 MB") ++ def test_buffer_memory_usage_decode_one_chunk( ++ self, preload_content, amt ++ ): ++ content_length = 10 * 2**20 # 10 MiB ++ fp = BytesIO(zlib.compress(bytes(content_length))) ++ resp = HTTPResponse( ++ fp, ++ preload_content=preload_content, ++ headers={"content-encoding": "deflate"}, ++ ) ++ data = resp.data if preload_content else resp.read(amt) ++ assert len(data) == content_length ++ ++ @pytest.mark.parametrize( ++ "preload_content, amt", ++ [(True, None), (False, None), (False, 10 * 2**20)], ++ ) ++ @pytest.mark.limit_memory("10.5 MB") ++ def test_buffer_memory_usage_no_decoding( ++ self, preload_content, amt ++ ): ++ content_length = 10 * 2**20 # 10 MiB ++ fp = BytesIO(bytes(content_length)) ++ resp = HTTPResponse(fp, preload_content=preload_content, decode_content=False) ++ data = resp.data if preload_content else resp.read(amt) ++ assert len(data) == content_length ++ + def test_length_no_header(self): + fp = BytesIO(b"12345") + resp = HTTPResponse(fp, preload_content=False) +diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py +index 254899ef..3309b8aa 100644 +--- a/test/with_dummyserver/test_socketlevel.py ++++ b/test/with_dummyserver/test_socketlevel.py +@@ -1866,15 +1866,8 @@ class TestBadContentLength(SocketDummyServerTestCase): + "GET", url="/", preload_content=False, enforce_content_length=True + ) + data = get_response.stream(100) +- # Read "good" data before we try to read again. +- # This won't trigger till generator is exhausted. +- next(data) +- try: ++ with pytest.raises(ProtocolError, match="12 bytes read, 10 more expected"): + next(data) +- assert False +- except ProtocolError as e: +- assert "12 bytes read, 10 more expected" in str(e) +- + done_event.set() + + def test_enforce_content_length_no_body(self): +-- +2.52.0 + + +From 10fb2b10818ee0caf0ee6ac7c02268ae69337a36 Mon Sep 17 00:00:00 2001 +From: Seth Michael Larson +Date: Wed, 3 May 2023 15:46:21 -0500 +Subject: [PATCH 3/5] Continue reading the response stream if there is buffered + decompressed data + +(cherry picked from commit 4714836a667eb4837d005eb89d34fae60b9dc6cc) +--- + changelog/3009.bugfix | 3 ++ + src/urllib3/response.py | 2 +- + test/with_dummyserver/test_socketlevel.py | 49 +++++++++++++++++++++++ + 3 files changed, 53 insertions(+), 1 deletion(-) + create mode 100644 changelog/3009.bugfix + +diff --git a/changelog/3009.bugfix b/changelog/3009.bugfix +new file mode 100644 +index 00000000..61f54a49 +--- /dev/null ++++ b/changelog/3009.bugfix +@@ -0,0 +1,3 @@ ++Fixed ``HTTPResponse.stream()`` to continue yielding bytes if buffered decompressed data ++was still available to be read even if the underlying socket is closed. This prevents ++a compressed response from being truncated. +diff --git a/src/urllib3/response.py b/src/urllib3/response.py +index 9793de48..4b763cf8 100644 +--- a/src/urllib3/response.py ++++ b/src/urllib3/response.py +@@ -717,7 +717,7 @@ class HTTPResponse(io.IOBase): + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: +- while not is_fp_closed(self._fp): ++ while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: + data = self.read(amt=amt, decode_content=decode_content) + + if data: +diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py +index 3309b8aa..ff4bc9b9 100644 +--- a/test/with_dummyserver/test_socketlevel.py ++++ b/test/with_dummyserver/test_socketlevel.py +@@ -42,6 +42,7 @@ import socket + import ssl + import sys + import tempfile ++import zlib + from collections import OrderedDict + from test import ( + LONG_TIMEOUT, +@@ -1837,6 +1838,54 @@ class TestStream(SocketDummyServerTestCase): + + done_event.set() + ++ def test_large_compressed_stream(self): ++ done_event = Event() ++ expected_total_length = 296085 ++ ++ def socket_handler(listener: socket.socket): ++ compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) ++ data = compress.compress(b"x" * expected_total_length) ++ data += compress.flush() ++ ++ sock = listener.accept()[0] ++ ++ buf = b"" ++ while not buf.endswith(b"\r\n\r\n"): ++ buf += sock.recv(65536) ++ ++ sock.sendall( ++ b"HTTP/1.1 200 OK\r\n" ++ b"Content-Length: %d\r\n" ++ b"Content-Encoding: gzip\r\n" ++ b"\r\n" % (len(data),) + data ++ ) ++ ++ done_event.wait(5) ++ sock.close() ++ ++ self._start_server(socket_handler) ++ ++ with HTTPConnectionPool(self.host, self.port, retries=False) as pool: ++ r = pool.request("GET", "/", timeout=LONG_TIMEOUT, preload_content=False) ++ ++ # Chunks must all be equal or less than 10240 ++ # and only the last chunk is allowed to be smaller ++ # than 10240. ++ total_length = 0 ++ chunks_smaller_than_10240 = 0 ++ for chunk in r.stream(10240, decode_content=True): ++ assert 0 < len(chunk) <= 10240 ++ if len(chunk) < 10240: ++ chunks_smaller_than_10240 += 1 ++ else: ++ assert chunks_smaller_than_10240 == 0 ++ total_length += len(chunk) ++ ++ assert chunks_smaller_than_10240 == 1 ++ assert expected_total_length == total_length ++ ++ done_event.set() ++ + + class TestBadContentLength(SocketDummyServerTestCase): + def test_enforce_content_length_get(self): +-- +2.52.0 + + +From e82a62787e04da58d08254a54a4da8b65d92e8b8 Mon Sep 17 00:00:00 2001 +From: Quentin Pradet +Date: Sun, 30 Apr 2023 00:29:17 +0400 +Subject: [PATCH 4/5] Fix HTTPResponse.read(0) when underlying buffer is empty + (#2998) + +(cherry picked from commit 02ae65a45654bb3ced12b9ad22278c11e214aaf8) +--- + changelog/2998.bugfix.rst | 1 + + src/urllib3/response.py | 2 ++ + test/test_response.py | 3 +++ + 3 files changed, 6 insertions(+) + create mode 100644 changelog/2998.bugfix.rst + +diff --git a/changelog/2998.bugfix.rst b/changelog/2998.bugfix.rst +new file mode 100644 +index 00000000..584f309a +--- /dev/null ++++ b/changelog/2998.bugfix.rst +@@ -0,0 +1 @@ ++Fixed ``HTTPResponse.read(0)`` call when underlying buffer is empty. +diff --git a/src/urllib3/response.py b/src/urllib3/response.py +index 4b763cf8..f6cf33a8 100644 +--- a/src/urllib3/response.py ++++ b/src/urllib3/response.py +@@ -190,6 +190,8 @@ class BytesQueueBuffer: + self._size += len(data) + + def get(self, n): ++ if n == 0: ++ return b"" + if not self.buffer: + raise RuntimeError("buffer is empty") + elif n < 0: +diff --git a/test/test_response.py b/test/test_response.py +index 44a630af..e85984f1 100644 +--- a/test/test_response.py ++++ b/test/test_response.py +@@ -38,6 +38,8 @@ class TestBytesQueueBuffer: + with pytest.raises(RuntimeError, match="buffer is empty"): + assert buffer.get(10) + ++ assert buffer.get(0) == b"" ++ + buffer.put(b"foo") + with pytest.raises(ValueError, match="n should be > 0"): + buffer.get(-1) +@@ -175,6 +177,7 @@ class TestResponse(object): + fp = BytesIO(b"foo") + r = HTTPResponse(fp, preload_content=False) + ++ assert r.read(0) == b"" + assert r.read(1) == b"f" + assert r.read(2) == b"oo" + assert r.read() == b"" +-- +2.52.0 + + +From c59306e9d91508332ecb90b6e9485ee100bcb81a Mon Sep 17 00:00:00 2001 +From: Illia Volochii +Date: Fri, 5 Dec 2025 16:40:41 +0200 +Subject: [PATCH 5/5] Security fix for CVE-2025-66471 + +(cherry picked from commit c19571de34c47de3a766541b041637ba5f716ed7) +--- + CHANGES.rst | 8 ++ + docs/advanced-usage.rst | 3 +- + docs/user-guide.rst | 4 +- + src/urllib3/response.py | 208 ++++++++++++++++++++++++++++++++++------ + test/test_response.py | 176 ++++++++++++++++++++++++++++++++++ + 5 files changed, 365 insertions(+), 34 deletions(-) + +diff --git a/CHANGES.rst b/CHANGES.rst +index c45b3d16..943c7679 100644 +--- a/CHANGES.rst ++++ b/CHANGES.rst +@@ -1,6 +1,14 @@ + Changes + ======= + ++Backports ++--------- ++ ++- Fixed a security issue where streaming API could improperly handle highly ++ compressed HTTP content ("decompression bombs") leading to excessive resource ++ consumption even when a small amount of data was requested. Reading small ++ chunks of compressed data is safer and much more efficient now. ++ + 1.26.19 (2024-06-17) + -------------------- + +diff --git a/docs/advanced-usage.rst b/docs/advanced-usage.rst +index d38e7190..d2b4c400 100644 +--- a/docs/advanced-usage.rst ++++ b/docs/advanced-usage.rst +@@ -57,7 +57,8 @@ When using ``preload_content=True`` (the default setting) the + response body will be read immediately into memory and the HTTP connection + will be released back into the pool without manual intervention. + +-However, when dealing with large responses it's often better to stream the response ++However, when dealing with responses of large or unknown length, ++it's often better to stream the response + content using ``preload_content=False``. Setting ``preload_content`` to ``False`` means + that urllib3 will only read from the socket when data is requested. + +diff --git a/docs/user-guide.rst b/docs/user-guide.rst +index abd53233..bb16b9a8 100644 +--- a/docs/user-guide.rst ++++ b/docs/user-guide.rst +@@ -99,8 +99,8 @@ to a byte string representing the response content: + >>> r.data + b'\xaa\xa5H?\x95\xe9\x9b\x11' + +-.. note:: For larger responses, it's sometimes better to :ref:`stream ` +- the response. ++.. note:: For responses of large or unknown length, it's sometimes better to ++ :ref:`stream ` the response. + + Using io Wrappers with Response Content + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +diff --git a/src/urllib3/response.py b/src/urllib3/response.py +index f6cf33a8..d0665533 100644 +--- a/src/urllib3/response.py ++++ b/src/urllib3/response.py +@@ -24,6 +24,7 @@ from .connection import BaseSSLError, HTTPException + from .exceptions import ( + BodyNotHttplibCompatible, + DecodeError, ++ DependencyWarning, + HTTPError, + IncompleteRead, + InvalidChunkLength, +@@ -42,33 +43,60 @@ log = logging.getLogger(__name__) + class DeflateDecoder(object): + def __init__(self): + self._first_try = True +- self._data = b"" ++ self._first_try_data = b"" ++ self._unfed_data = b"" + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + +- def decompress(self, data): +- if not data: ++ def decompress(self, data, max_length=-1): ++ data = self._unfed_data + data ++ self._unfed_data = b"" ++ if not data and not self._obj.unconsumed_tail: + return data ++ original_max_length = max_length ++ if original_max_length < 0: ++ max_length = 0 ++ elif original_max_length == 0: ++ # We should not pass 0 to the zlib decompressor because 0 is ++ # the default value that will make zlib decompress without a ++ # length limit. ++ # Data should be stored for subsequent calls. ++ self._unfed_data = data ++ return b"" + ++ # Subsequent calls always reuse `self._obj`. zlib requires ++ # passing the unconsumed tail if decompression is to continue. + if not self._first_try: +- return self._obj.decompress(data) ++ return self._obj.decompress( ++ self._obj.unconsumed_tail + data, max_length=max_length ++ ) + +- self._data += data ++ # First call tries with RFC 1950 ZLIB format. ++ self._first_try_data += data + try: +- decompressed = self._obj.decompress(data) ++ decompressed = self._obj.decompress(data, max_length=max_length) + if decompressed: + self._first_try = False +- self._data = None ++ self._first_try_data = b"" + return decompressed ++ # On failure, it falls back to RFC 1951 DEFLATE format. + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: +- return self.decompress(self._data) ++ return self.decompress( ++ self._first_try_data, max_length=original_max_length ++ ) + finally: +- self._data = None ++ self._first_try_data = b"" ++ ++ @property ++ def has_unconsumed_tail(self): ++ return bool(self._unfed_data) or ( ++ bool(self._obj.unconsumed_tail) and not self._first_try ++ ) + + + class GzipDecoderState(object): +@@ -82,30 +110,64 @@ class GzipDecoder(object): + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + self._state = GzipDecoderState.FIRST_MEMBER ++ self._unconsumed_tail = b"" + + def __getattr__(self, name): + return getattr(self._obj, name) + +- def decompress(self, data): ++ def decompress(self, data, max_length=-1): + ret = bytearray() +- if self._state == GzipDecoderState.SWALLOW_DATA or not data: ++ if self._state == GzipDecoderState.SWALLOW_DATA: + return bytes(ret) ++ ++ if max_length == 0: ++ # We should not pass 0 to the zlib decompressor because 0 is ++ # the default value that will make zlib decompress without a ++ # length limit. ++ # Data should be stored for subsequent calls. ++ self._unconsumed_tail += data ++ return b"" ++ ++ # zlib requires passing the unconsumed tail to the subsequent ++ # call if decompression is to continue. ++ data = self._unconsumed_tail + data ++ if not data and self._obj.eof: ++ return bytes(ret) ++ + while True: + try: +- ret += self._obj.decompress(data) ++ ret += self._obj.decompress( ++ data, max_length=max(max_length - len(ret), 0) ++ ) + except zlib.error: + previous_state = self._state + # Ignore data after the first error + self._state = GzipDecoderState.SWALLOW_DATA ++ self._unconsumed_tail = b"" + if previous_state == GzipDecoderState.OTHER_MEMBERS: + # Allow trailing garbage acceptable in other gzip clients + return bytes(ret) + raise +- data = self._obj.unused_data ++ ++ self._unconsumed_tail = data = ( ++ self._obj.unconsumed_tail or self._obj.unused_data ++ ) ++ if max_length > 0 and len(ret) >= max_length: ++ break ++ + if not data: + return bytes(ret) +- self._state = GzipDecoderState.OTHER_MEMBERS +- self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) ++ # When the end of a gzip member is reached, a new decompressor ++ # must be created for unused (possibly future) data. ++ if self._obj.eof: ++ self._state = GzipDecoderState.OTHER_MEMBERS ++ self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) ++ ++ return bytes(ret) ++ ++ @property ++ def has_unconsumed_tail(self): ++ return bool(self._unconsumed_tail) + + + if brotli is not None: +@@ -117,9 +179,35 @@ if brotli is not None: + def __init__(self): + self._obj = brotli.Decompressor() + if hasattr(self._obj, "decompress"): +- self.decompress = self._obj.decompress ++ self._decompress = self._obj.decompress + else: +- self.decompress = self._obj.process ++ self._decompress = self._obj.process ++ ++ # Requires Brotli >= 1.2.0 for `output_buffer_limit`. ++ def _decompress(self, data, output_buffer_limit=-1): ++ raise NotImplementedError() ++ ++ def decompress(self, data, max_length=-1): ++ try: ++ if max_length > 0: ++ return self._decompress(data, output_buffer_limit=max_length) ++ else: ++ return self._decompress(data) ++ except TypeError: ++ # Fallback for Brotli/brotlicffi/brotlipy versions without ++ # the `output_buffer_limit` parameter. ++ warnings.warn( ++ "Brotli >= 1.2.0 is required to prevent decompression bombs.", ++ DependencyWarning, ++ ) ++ return self._decompress(data) ++ ++ @property ++ def has_unconsumed_tail(self): ++ try: ++ return not self._obj.can_accept_more_data() ++ except AttributeError: ++ return False + + def flush(self): + if hasattr(self._obj, "flush"): +@@ -142,10 +230,35 @@ class MultiDecoder(object): + def flush(self): + return self._decoders[0].flush() + +- def decompress(self, data): +- for d in reversed(self._decoders): +- data = d.decompress(data) +- return data ++ def decompress(self, data, max_length=-1): ++ if max_length <= 0: ++ for d in reversed(self._decoders): ++ data = d.decompress(data) ++ return data ++ ++ ret = bytearray() ++ # Every while loop iteration goes through all decoders once. ++ # It exits when enough data is read or no more data can be read. ++ # It is possible that the while loop iteration does not produce ++ # any data because we retrieve up to `max_length` from every ++ # decoder, and the amount of bytes may be insufficient for the ++ # next decoder to produce enough/any output. ++ while True: ++ any_data = False ++ for d in reversed(self._decoders): ++ data = d.decompress(data, max_length=max_length - len(ret)) ++ if data: ++ any_data = True ++ # We should not break when no data is returned because ++ # next decoders may produce data even with empty input. ++ ret += data ++ if not any_data or len(ret) >= max_length: ++ return bytes(ret) ++ data = b"" ++ ++ @property ++ def has_unconsumed_tail(self): ++ return any(d.has_unconsumed_tail for d in self._decoders) + + + def _get_decoder(mode): +@@ -173,9 +286,6 @@ class BytesQueueBuffer: + + * self.buffer, which contains the full data + * the largest chunk that we will copy in get() +- +- The worst case scenario is a single chunk, in which case we'll make a full copy of +- the data inside get(). + """ + + def __init__(self): +@@ -197,6 +307,10 @@ class BytesQueueBuffer: + elif n < 0: + raise ValueError("n should be > 0") + ++ if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes): ++ self._size -= n ++ return self.buffer.popleft() ++ + fetched = 0 + ret = io.BytesIO() + while fetched < n: +@@ -458,16 +572,19 @@ class HTTPResponse(io.IOBase): + if brotli is not None: + DECODER_ERROR_CLASSES += (brotli.error,) + +- def _decode(self, data, decode_content, flush_decoder): ++ def _decode(self, data, decode_content, flush_decoder, max_length=None): + """ + Decode the data passed in and potentially flush the decoder. + """ + if not decode_content: + return data + ++ if max_length is None or flush_decoder: ++ max_length = -1 ++ + try: + if self._decoder: +- data = self._decoder.decompress(data) ++ data = self._decoder.decompress(data, max_length=max_length) + except self.DECODER_ERROR_CLASSES as e: + content_encoding = self.headers.get("content-encoding", "").lower() + raise DecodeError( +@@ -662,6 +779,14 @@ class HTTPResponse(io.IOBase): + if amt is not None: + cache_content = False + ++ if self._decoder and self._decoder.has_unconsumed_tail: ++ decoded_data = self._decode( ++ b"", ++ decode_content, ++ flush_decoder=False, ++ max_length=amt - len(self._decoded_buffer), ++ ) ++ self._decoded_buffer.put(decoded_data) + if len(self._decoded_buffer) >= amt: + return self._decoded_buffer.get(amt) + +@@ -673,7 +798,11 @@ class HTTPResponse(io.IOBase): + elif amt != 0 and not data: + flush_decoder = True + +- if not data and len(self._decoded_buffer) == 0: ++ if ( ++ not data ++ and len(self._decoded_buffer) == 0 ++ and not (self._decoder and self._decoder.has_unconsumed_tail) ++ ): + return data + + if amt is None: +@@ -685,7 +814,12 @@ class HTTPResponse(io.IOBase): + if not decode_content: + return data + +- decoded_data = self._decode(data, decode_content, flush_decoder) ++ decoded_data = self._decode( ++ data, ++ decode_content, ++ flush_decoder, ++ max_length=amt - len(self._decoded_buffer), ++ ) + self._decoded_buffer.put(decoded_data) + + while len(self._decoded_buffer) < amt and data: +@@ -693,7 +827,12 @@ class HTTPResponse(io.IOBase): + # For example, the GZ file header takes 10 bytes, we don't want to read + # it one byte at a time + data = self._raw_read(amt) +- decoded_data = self._decode(data, decode_content, flush_decoder) ++ decoded_data = self._decode( ++ data, ++ decode_content, ++ flush_decoder, ++ max_length=amt - len(self._decoded_buffer), ++ ) + self._decoded_buffer.put(decoded_data) + data = self._decoded_buffer.get(amt) + +@@ -719,7 +858,11 @@ class HTTPResponse(io.IOBase): + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: +- while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: ++ while ( ++ not is_fp_closed(self._fp) ++ or len(self._decoded_buffer) > 0 ++ or (self._decoder and self._decoder.has_unconsumed_tail) ++ ): + data = self.read(amt=amt, decode_content=decode_content) + + if data: +@@ -925,7 +1068,10 @@ class HTTPResponse(io.IOBase): + break + chunk = self._handle_chunk(amt) + decoded = self._decode( +- chunk, decode_content=decode_content, flush_decoder=False ++ chunk, ++ decode_content=decode_content, ++ flush_decoder=False, ++ max_length=amt, + ) + if decoded: + yield decoded +diff --git a/test/test_response.py b/test/test_response.py +index e85984f1..f949b2b1 100644 +--- a/test/test_response.py ++++ b/test/test_response.py +@@ -1,6 +1,7 @@ + # -*- coding: utf-8 -*- + + import contextlib ++import gzip + import re + import socket + import ssl +@@ -31,6 +32,22 @@ from urllib3.util.response import is_fp_closed + from urllib3.util.retry import RequestHistory, Retry + + ++def deflate2_compress(data): ++ compressor = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS) ++ return compressor.compress(data) + compressor.flush() ++ ++ ++if brotli: ++ try: ++ brotli.Decompressor().process(b"", output_buffer_limit=1024) ++ _brotli_gte_1_2_0_available = True ++ except (AttributeError, TypeError): ++ _brotli_gte_1_2_0_available = False ++else: ++ _brotli_gte_1_2_0_available = False ++_zstd_available = False ++ ++ + class TestBytesQueueBuffer: + def test_single_chunk(self): + buffer = BytesQueueBuffer() +@@ -318,6 +335,165 @@ class TestResponse(object): + with pytest.raises(DecodeError): + HTTPResponse(fp, headers={"content-encoding": "br"}) + ++ _test_compressor_params = [ ++ ("deflate1", ("deflate", zlib.compress)), ++ ("deflate2", ("deflate", deflate2_compress)), ++ ("gzip", ("gzip", gzip.compress)), ++ ] ++ if _brotli_gte_1_2_0_available: ++ _test_compressor_params.append(("brotli", ("br", brotli.compress))) ++ else: ++ _test_compressor_params.append(("brotli", None)) ++ if _zstd_available: ++ _test_compressor_params.append(("zstd", ("zstd", zstd_compress))) ++ else: ++ _test_compressor_params.append(("zstd", None)) ++ ++ @pytest.mark.parametrize("read_method", ("read",)) ++ @pytest.mark.parametrize( ++ "data", ++ [d[1] for d in _test_compressor_params], ++ ids=[d[0] for d in _test_compressor_params], ++ ) ++ def test_read_with_all_data_already_in_decompressor( ++ self, ++ request, ++ read_method, ++ data, ++ ): ++ if data is None: ++ pytest.skip(f"Proper {request.node.callspec.id} decoder is not available") ++ original_data = b"bar" * 1000 ++ name, compress_func = data ++ compressed_data = compress_func(original_data) ++ fp = mock.Mock(read=mock.Mock(return_value=b"")) ++ r = HTTPResponse(fp, headers={"content-encoding": name}, preload_content=False) ++ # Put all data in the decompressor's buffer. ++ r._init_decoder() ++ assert r._decoder is not None # for mypy ++ decoded = r._decoder.decompress(compressed_data, max_length=0) ++ if name == "br": ++ # It's known that some Brotli libraries do not respect ++ # `max_length`. ++ r._decoded_buffer.put(decoded) ++ else: ++ assert decoded == b"" ++ # Read the data via `HTTPResponse`. ++ read = getattr(r, read_method) ++ assert read(0) == b"" ++ assert read(2500) == original_data[:2500] ++ assert read(500) == original_data[2500:] ++ assert read(0) == b"" ++ assert read() == b"" ++ ++ @pytest.mark.parametrize( ++ "delta", ++ ( ++ 0, # First read from socket returns all compressed data. ++ -1, # First read from socket returns all but one byte of compressed data. ++ ), ++ ) ++ @pytest.mark.parametrize("read_method", ("read",)) ++ @pytest.mark.parametrize( ++ "data", ++ [d[1] for d in _test_compressor_params], ++ ids=[d[0] for d in _test_compressor_params], ++ ) ++ def test_decode_with_max_length_close_to_compressed_data_size( ++ self, ++ request, ++ delta, ++ read_method, ++ data, ++ ): ++ """ ++ Test decoding when the first read from the socket returns all or ++ almost all the compressed data, but then it has to be ++ decompressed in a couple of read calls. ++ """ ++ if data is None: ++ pytest.skip(f"Proper {request.node.callspec.id} decoder is not available") ++ ++ original_data = b"foo" * 1000 ++ name, compress_func = data ++ compressed_data = compress_func(original_data) ++ fp = BytesIO(compressed_data) ++ r = HTTPResponse(fp, headers={"content-encoding": name}, preload_content=False) ++ initial_limit = len(compressed_data) + delta ++ read = getattr(r, read_method) ++ initial_chunk = read(amt=initial_limit, decode_content=True) ++ assert len(initial_chunk) == initial_limit ++ assert ( ++ len(read(amt=len(original_data), decode_content=True)) ++ == len(original_data) - initial_limit ++ ) ++ ++ # Prepare 50 MB of compressed data outside of the test measuring ++ # memory usage. ++ _test_memory_usage_decode_with_max_length_params = [ ++ ( ++ params[0], ++ (params[1][0], params[1][1](b"A" * (50 * 2**20))) if params[1] else None, ++ ) ++ for params in _test_compressor_params ++ ] ++ ++ @pytest.mark.parametrize( ++ "data", ++ [d[1] for d in _test_memory_usage_decode_with_max_length_params], ++ ids=[d[0] for d in _test_memory_usage_decode_with_max_length_params], ++ ) ++ @pytest.mark.parametrize("read_method", ("read", "read_chunked", "stream")) ++ # Decoders consume different amounts of memory during decompression. ++ # We set the 10 MB limit to ensure that the whole decompressed data ++ # is not stored unnecessarily. ++ # ++ # FYI, the following consumption was observed for the test with ++ # `read` on CPython 3.14.0: ++ # - deflate: 2.3 MiB ++ # - deflate2: 2.1 MiB ++ # - gzip: 2.1 MiB ++ # - brotli: ++ # - brotli v1.2.0: 9 MiB ++ # - brotlicffi v1.2.0.0: 6 MiB ++ # - brotlipy v0.7.0: 105.8 MiB ++ # - zstd: 4.5 MiB ++ @pytest.mark.limit_memory("10 MB", current_thread_only=True) ++ def test_memory_usage_decode_with_max_length( ++ self, ++ request, ++ read_method, ++ data, ++ ): ++ if data is None: ++ pytest.skip(f"Proper {request.node.callspec.id} decoder is not available") ++ ++ name, compressed_data = data ++ limit = 1024 * 1024 # 1 MiB ++ if read_method in ("read_chunked", "stream"): ++ httplib_r = httplib.HTTPResponse(MockSock) # type: ignore[arg-type] ++ httplib_r.fp = MockChunkedEncodingResponse([compressed_data]) # type: ignore[assignment] ++ r = HTTPResponse( ++ httplib_r, ++ preload_content=False, ++ headers={"transfer-encoding": "chunked", "content-encoding": name}, ++ ) ++ next(getattr(r, read_method)(amt=limit, decode_content=True)) ++ else: ++ fp = BytesIO(compressed_data) ++ r = HTTPResponse( ++ fp, headers={"content-encoding": name}, preload_content=False ++ ) ++ getattr(r, read_method)(amt=limit, decode_content=True) ++ ++ # Check that the internal decoded buffer is empty unless brotli ++ # is used. ++ # Google's brotli library does not fully respect the output ++ # buffer limit: https://github.com/google/brotli/issues/1396 ++ # And unmaintained brotlipy cannot limit the output buffer size. ++ if name != "br" or brotli.__name__ == "brotlicffi": ++ assert len(r._decoded_buffer) == 0 ++ + def test_multi_decoding_deflate_deflate(self): + data = zlib.compress(zlib.compress(b"foo")) + +-- +2.52.0 + diff --git a/SOURCES/CVE-2026-21441.patch b/SOURCES/CVE-2026-21441.patch new file mode 100644 index 0000000..016c60b --- /dev/null +++ b/SOURCES/CVE-2026-21441.patch @@ -0,0 +1,219 @@ +From 8c0a6cd7918719c5a636640d1c93e1609c6c6ce0 Mon Sep 17 00:00:00 2001 +From: Ousret +Date: Thu, 17 Nov 2022 01:40:19 +0100 +Subject: [PATCH 1/2] Prevent issue in HTTPResponse().read() when + decoded_content is True and then False Provided it has initialized eligible + decoder(decompressor) and did decode once + +(cherry picked from commit cefd1dbba6a20ea4f017e6e472f9ada3a8a743e0) +--- + changelog/2800.bugfix.rst | 1 + + src/urllib3/response.py | 12 ++++++++++++ + test/test_response.py | 35 +++++++++++++++++++++++++++++++++++ + 3 files changed, 48 insertions(+) + create mode 100644 changelog/2800.bugfix.rst + +diff --git a/changelog/2800.bugfix.rst b/changelog/2800.bugfix.rst +new file mode 100644 +index 00000000..9dcf1eec +--- /dev/null ++++ b/changelog/2800.bugfix.rst +@@ -0,0 +1 @@ ++Prevented issue in HTTPResponse().read() when decoded_content is True and then False. +\ No newline at end of file +diff --git a/src/urllib3/response.py b/src/urllib3/response.py +index f945c41c..81ef4455 100644 +--- a/src/urllib3/response.py ++++ b/src/urllib3/response.py +@@ -412,6 +412,7 @@ class HTTPResponse(io.IOBase): + self.reason = reason + self.strict = strict + self.decode_content = decode_content ++ self._has_decoded_content = False + self.retries = retries + self.enforce_content_length = enforce_content_length + self.auto_close = auto_close +@@ -587,6 +588,11 @@ class HTTPResponse(io.IOBase): + Decode the data passed in and potentially flush the decoder. + """ + if not decode_content: ++ if self._has_decoded_content: ++ raise RuntimeError( ++ "Calling read(decode_content=False) is not supported after " ++ "read(decode_content=True) was called." ++ ) + return data + + if max_length is None or flush_decoder: +@@ -595,6 +601,7 @@ class HTTPResponse(io.IOBase): + try: + if self._decoder: + data = self._decoder.decompress(data, max_length=max_length) ++ self._has_decoded_content = True + except self.DECODER_ERROR_CLASSES as e: + content_encoding = self.headers.get("content-encoding", "").lower() + raise DecodeError( +@@ -822,6 +829,11 @@ class HTTPResponse(io.IOBase): + else: + # do not waste memory on buffer when not decoding + if not decode_content: ++ if self._has_decoded_content: ++ raise RuntimeError( ++ "Calling read(decode_content=False) is not supported after " ++ "read(decode_content=True) was called." ++ ) + return data + + decoded_data = self._decode( +diff --git a/test/test_response.py b/test/test_response.py +index 33f570c4..2614960f 100644 +--- a/test/test_response.py ++++ b/test/test_response.py +@@ -710,6 +710,41 @@ class TestResponse(object): + next(reader) + assert re.match("I/O operation on closed file.?", str(ctx.value)) + ++ def test_read_with_illegal_mix_decode_toggle(self): ++ compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS) ++ data = compress.compress(b"foo") ++ data += compress.flush() ++ ++ fp = BytesIO(data) ++ ++ resp = HTTPResponse( ++ fp, headers={"content-encoding": "deflate"}, preload_content=False ++ ) ++ ++ assert resp.read(1) == b"f" ++ ++ with pytest.raises( ++ RuntimeError, ++ match=( ++ r"Calling read\(decode_content=False\) is not supported after " ++ r"read\(decode_content=True\) was called" ++ ), ++ ): ++ resp.read(1, decode_content=False) ++ ++ def test_read_with_mix_decode_toggle(self): ++ compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS) ++ data = compress.compress(b"foo") ++ data += compress.flush() ++ ++ fp = BytesIO(data) ++ ++ resp = HTTPResponse( ++ fp, headers={"content-encoding": "deflate"}, preload_content=False ++ ) ++ resp.read(1, decode_content=False) ++ assert resp.read(1, decode_content=True) == b"o" ++ + def test_streaming(self): + fp = BytesIO(b"foo") + resp = HTTPResponse(fp, preload_content=False) +-- +2.52.0 + + +From 868d351ffcd32b0aa30fb94db61b2dd51c6c231b Mon Sep 17 00:00:00 2001 +From: Illia Volochii +Date: Wed, 7 Jan 2026 18:07:30 +0200 +Subject: [PATCH 2/2] Security fix for CVE-2026-21441 + +* Stop decoding response content during redirects needlessly +* Rename the new query parameter +* Add a changelog entry + +(cherry picked from commit 8864ac407bba8607950025e0979c4c69bc7abc7b) +--- + CHANGES.rst | 3 +++ + dummyserver/handlers.py | 8 +++++++- + src/urllib3/response.py | 6 +++++- + test/with_dummyserver/test_connectionpool.py | 19 +++++++++++++++++++ + 4 files changed, 34 insertions(+), 2 deletions(-) + +diff --git a/CHANGES.rst b/CHANGES.rst +index 943c7679..f712291e 100644 +--- a/CHANGES.rst ++++ b/CHANGES.rst +@@ -8,6 +8,9 @@ Backports + compressed HTTP content ("decompression bombs") leading to excessive resource + consumption even when a small amount of data was requested. Reading small + chunks of compressed data is safer and much more efficient now. ++- Fixed a high-severity security issue where decompression-bomb safeguards of ++ the streaming API were bypassed when HTTP redirects were followed. ++ (`GHSA-38jv-5279-wg99 `__) + + 1.26.19 (2024-06-17) + -------------------- +diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py +index acd181d2..dea01682 100644 +--- a/dummyserver/handlers.py ++++ b/dummyserver/handlers.py +@@ -188,9 +188,15 @@ class TestingApp(RequestHandler): + status = "%s Redirect" % status.decode("latin-1") + elif isinstance(status, bytes): + status = status.decode("latin-1") ++ compressed = request.params.get("compressed") == b"true" + + headers = [("Location", target)] +- return Response(status=status, headers=headers) ++ if compressed: ++ headers.append(("Content-Encoding", "gzip")) ++ data = gzip.compress(b"foo") ++ else: ++ data = b"" ++ return Response(data, status=status, headers=headers) + + def not_found(self, request): + return Response("Not found", status="404 Not Found") +diff --git a/src/urllib3/response.py b/src/urllib3/response.py +index 81ef4455..1357d65c 100644 +--- a/src/urllib3/response.py ++++ b/src/urllib3/response.py +@@ -480,7 +480,11 @@ class HTTPResponse(io.IOBase): + Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. + """ + try: +- self.read() ++ self.read( ++ # Do not spend resources decoding the content unless ++ # decoding has already been initiated. ++ decode_content=self._has_decoded_content, ++ ) + except (HTTPError, SocketError, BaseSSLError, HTTPException): + pass + +diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py +index cde027b9..6e74883a 100644 +--- a/test/with_dummyserver/test_connectionpool.py ++++ b/test/with_dummyserver/test_connectionpool.py +@@ -464,6 +464,25 @@ class TestConnectionPool(HTTPDummyServerTestCase): + assert r.status == 200 + assert r.data == b"Dummy server!" + ++ @mock.patch("urllib3.response.GzipDecoder.decompress") ++ def test_no_decoding_with_redirect_when_preload_disabled( ++ self, gzip_decompress ++ ): ++ """ ++ Test that urllib3 does not attempt to decode a gzipped redirect ++ response when `preload_content` is set to `False`. ++ """ ++ with HTTPConnectionPool(self.host, self.port) as pool: ++ # Three requests are expected: two redirects and one final / 200 OK. ++ response = pool.request( ++ "GET", ++ "/redirect", ++ fields={"target": "/redirect?compressed=true", "compressed": "true"}, ++ preload_content=False, ++ ) ++ assert response.status == 200 ++ gzip_decompress.assert_not_called() ++ + def test_303_redirect_makes_request_lose_body(self): + with HTTPConnectionPool(self.host, self.port) as pool: + response = pool.request( +-- +2.52.0 + diff --git a/SPECS/python3.12-urllib3.spec b/SPECS/python3.12-urllib3.spec index ec7a517..0667f30 100644 --- a/SPECS/python3.12-urllib3.spec +++ b/SPECS/python3.12-urllib3.spec @@ -5,14 +5,17 @@ %bcond_with tests Name: python%{python3_pkgversion}-urllib3 -Version: 1.26.18 -Release: 2%{?dist} +Version: 1.26.19 +Release: 1%{?dist}.1 Summary: HTTP library with thread-safe connection pooling, file post, and more # SPDX License: MIT URL: https://github.com/urllib3/urllib3 Source: %{url}/archive/%{version}/urllib3-%{version}.tar.gz +Patch1: CVE-2025-66471.patch +Patch2: CVE-2025-66418.patch +Patch3: CVE-2026-21441.patch BuildArch: noarch @@ -72,7 +75,7 @@ many critical features that are missing from the Python standard libraries: %prep -%autosetup -n urllib3-%{version} +%autosetup -p1 -n urllib3-%{version} # Make sure that the RECENT_DATE value doesn't get too far behind what the current date is. # RECENT_DATE must not be older that 2 years from the build time, or else test_recent_date # (from test/test_connection.py) would fail. However, it shouldn't be to close to the build time either, @@ -130,6 +133,16 @@ ignore="${ignore-} --ignore=test/test_no_ssl.py" %changelog +* Tue Dec 16 2025 Miro Hrončok - 1.26.19-1.1 +- Security fix for CVE-2025-66471 +- Security fix for CVE-2025-66418 +- Security fix for CVE-2026-21441 +Resolves: RHEL-139403 + +* Wed Sep 25 2024 Lumír Balhar - 1.26.19-1 +- Rebase to 1.26.19 to fix CVE-2024-37891 +Resolves: RHEL-59998 + * Tue Jan 23 2024 Miro Hrončok - 1.26.18-2 - Rebuilt for timestamp .pyc invalidation mode