python3.11/SOURCES/00465-tarfile-cves.patch

1902 lines
86 KiB
Diff

From 140f922ec832b286f6df994040af03de68502e1c Mon Sep 17 00:00:00 2001
From: "T. Wouters" <thomas@python.org>
Date: Tue, 3 Jun 2025 16:58:39 +0200
Subject: [PATCH] gh-135034: Normalize link targets in tarfile, add
`os.path.realpath(strict='allow_missing')` (GH-135037) (GH-135068)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Addresses CVEs 2024-12718, 2025-4138, 2025-4330, and 2025-4517.
(cherry picked from commit 3612d8f51741b11f36f8fb0494d79086bac9390a)
(cherry picked from commit c358142cab7ce621a2745262a90df967b357f61c)
Co-authored-by: Łukasz Langa <lukasz@langa.pl>
Signed-off-by: Łukasz Langa <lukasz@langa.pl>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Seth Michael Larson <seth@python.org>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Doc/library/os.path.rst | 33 +-
Doc/library/tarfile.rst | 20 ++
Doc/whatsnew/3.11.rst | 34 ++
Lib/genericpath.py | 11 +-
Lib/ntpath.py | 37 ++-
Lib/posixpath.py | 15 +-
Lib/tarfile.py | 161 +++++++--
Lib/test/test_ntpath.py | 237 +++++++++++--
Lib/test/test_posixpath.py | 289 +++++++++++++---
Lib/test/test_tarfile.py | 312 ++++++++++++++++--
...-06-02-11-32-23.gh-issue-135034.RLGjbp.rst | 6 +
11 files changed, 1017 insertions(+), 138 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst
diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
index 8f6dffd..f08c1fe 100644
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -352,10 +352,26 @@ the :mod:`glob` module.)
links encountered in the path (if they are supported by the operating
system).
- If a path doesn't exist or a symlink loop is encountered, and *strict* is
- ``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is
- resolved as far as possible and any remainder is appended without checking
- whether it exists.
+ By default, the path is evaluated up to the first component that does not
+ exist, is a symlink loop, or whose evaluation raises :exc:`OSError`.
+ All such components are appended unchanged to the existing part of the path.
+
+ Some errors that are handled this way include "access denied", "not a
+ directory", or "bad argument to internal function". Thus, the
+ resulting path may be missing or inaccessible, may still contain
+ links or loops, and may traverse non-directories.
+
+ This behavior can be modified by keyword arguments:
+
+ If *strict* is ``True``, the first error encountered when evaluating the path is
+ re-raised.
+ In particular, :exc:`FileNotFoundError` is raised if *path* does not exist,
+ or another :exc:`OSError` if it is otherwise inaccessible.
+
+ If *strict* is :py:data:`os.path.ALLOW_MISSING`, errors other than
+ :exc:`FileNotFoundError` are re-raised (as with ``strict=True``).
+ Thus, the returned path will not contain any symbolic links, but the named
+ file and some of its parent directories may be missing.
.. note::
This function emulates the operating system's procedure for making a path
@@ -374,6 +390,15 @@ the :mod:`glob` module.)
.. versionchanged:: 3.10
The *strict* parameter was added.
+ .. versionchanged:: next
+ The :py:data:`~os.path.ALLOW_MISSING` value for the *strict* parameter
+ was added.
+
+.. data:: ALLOW_MISSING
+
+ Special value used for the *strict* argument in :func:`realpath`.
+
+ .. versionadded:: next
.. function:: relpath(path, start=os.curdir)
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index 32d77d5..f3907cc 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -239,6 +239,15 @@ The :mod:`tarfile` module defines the following exceptions:
Raised to refuse extracting a symbolic link pointing outside the destination
directory.
+.. exception:: LinkFallbackError
+
+ Raised to refuse emulating a link (hard or symbolic) by extracting another
+ archive member, when that member would be rejected by the filter location.
+ The exception that was raised to reject the replacement member is available
+ as :attr:`!BaseException.__context__`.
+
+ .. versionadded:: next
+
The following constants are available at the module level:
@@ -1037,6 +1046,12 @@ reused in custom filters:
Implements the ``'data'`` filter.
In addition to what ``tar_filter`` does:
+ - Normalize link targets (:attr:`TarInfo.linkname`) using
+ :func:`os.path.normpath`.
+ Note that this removes internal ``..`` components, which may change the
+ meaning of the link if the path in :attr:`!TarInfo.linkname` traverses
+ symbolic links.
+
- :ref:`Refuse <tarfile-extraction-refuse>` to extract links (hard or soft)
that link to absolute paths, or ones that link outside the destination.
@@ -1065,6 +1080,10 @@ reused in custom filters:
Return the modified ``TarInfo`` member.
+ .. versionchanged:: next
+
+ Link targets are now normalized.
+
.. _tarfile-extraction-refuse:
@@ -1091,6 +1110,7 @@ Here is an incomplete list of things to consider:
* Extract to a :func:`new temporary directory <tempfile.mkdtemp>`
to prevent e.g. exploiting pre-existing links, and to make it easier to
clean up after a failed extraction.
+* Disallow symbolic links if you do not need the functionality.
* When working with untrusted data, use external (e.g. OS-level) limits on
disk, memory and CPU usage.
* Check filenames against an allow-list of characters
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
index 583a408..e9e3547 100644
--- a/Doc/whatsnew/3.11.rst
+++ b/Doc/whatsnew/3.11.rst
@@ -2786,3 +2786,37 @@ email
check if the *strict* paramater is available.
(Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
the CVE-2023-27043 fix.)
+
+
+Notable changes in 3.11.13
+==========================
+
+os.path
+-------
+
+* The *strict* parameter to :func:`os.path.realpath` accepts a new value,
+ :data:`os.path.ALLOW_MISSING`.
+ If used, errors other than :exc:`FileNotFoundError` will be re-raised;
+ the resulting path can be missing but it will be free of symlinks.
+ (Contributed by Petr Viktorin for CVE 2025-4517.)
+
+tarfile
+-------
+
+* :func:`~tarfile.data_filter` now normalizes symbolic link targets in order to
+ avoid path traversal attacks.
+ (Contributed by Petr Viktorin in :gh:`127987` and CVE 2025-4138.)
+* :func:`~tarfile.TarFile.extractall` now skips fixing up directory attributes
+ when a directory was removed or replaced by another kind of file.
+ (Contributed by Petr Viktorin in :gh:`127987` and CVE 2024-12718.)
+* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall`
+ now (re-)apply the extraction filter when substituting a link (hard or
+ symbolic) with a copy of another archive member, and when fixing up
+ directory attributes.
+ The former raises a new exception, :exc:`~tarfile.LinkFallbackError`.
+ (Contributed by Petr Viktorin for CVE 2025-4330 and CVE 2024-12718.)
+* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall`
+ no longer extract rejected members when
+ :func:`~tarfile.TarFile.errorlevel` is zero.
+ (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887`
+ and CVE 2025-4435.)
diff --git a/Lib/genericpath.py b/Lib/genericpath.py
index ce36451..ad8d47b 100644
--- a/Lib/genericpath.py
+++ b/Lib/genericpath.py
@@ -8,7 +8,7 @@ import stat
__all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime',
'getsize', 'isdir', 'isfile', 'samefile', 'sameopenfile',
- 'samestat']
+ 'samestat', 'ALLOW_MISSING']
# Does a path exist?
@@ -153,3 +153,12 @@ def _check_arg_types(funcname, *args):
f'os.PathLike object, not {s.__class__.__name__!r}') from None
if hasstr and hasbytes:
raise TypeError("Can't mix strings and bytes in path components") from None
+
+# A singleton with a true boolean value.
+@object.__new__
+class ALLOW_MISSING:
+ """Special value for use in realpath()."""
+ def __repr__(self):
+ return 'os.path.ALLOW_MISSING'
+ def __reduce__(self):
+ return self.__class__.__name__
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index 0246419..ebc55eb 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -30,7 +30,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
"ismount", "expanduser","expandvars","normpath","abspath",
"curdir","pardir","sep","pathsep","defpath","altsep",
"extsep","devnull","realpath","supports_unicode_filenames","relpath",
- "samefile", "sameopenfile", "samestat", "commonpath"]
+ "samefile", "sameopenfile", "samestat", "commonpath",
+ "ALLOW_MISSING"]
def _get_bothseps(path):
if isinstance(path, bytes):
@@ -578,9 +579,10 @@ try:
from nt import _getfinalpathname, readlink as _nt_readlink
except ImportError:
# realpath is a no-op on systems without _getfinalpathname support.
- realpath = abspath
+ def realpath(path, *, strict=False):
+ return abspath(path)
else:
- def _readlink_deep(path):
+ def _readlink_deep(path, ignored_error=OSError):
# These error codes indicate that we should stop reading links and
# return the path we currently have.
# 1: ERROR_INVALID_FUNCTION
@@ -613,7 +615,7 @@ else:
path = old_path
break
path = normpath(join(dirname(old_path), path))
- except OSError as ex:
+ except ignored_error as ex:
if ex.winerror in allowed_winerror:
break
raise
@@ -622,7 +624,7 @@ else:
break
return path
- def _getfinalpathname_nonstrict(path):
+ def _getfinalpathname_nonstrict(path, ignored_error=OSError):
# These error codes indicate that we should stop resolving the path
# and return the value we currently have.
# 1: ERROR_INVALID_FUNCTION
@@ -649,17 +651,18 @@ else:
try:
path = _getfinalpathname(path)
return join(path, tail) if tail else path
- except OSError as ex:
+ except ignored_error as ex:
if ex.winerror not in allowed_winerror:
raise
try:
# The OS could not resolve this path fully, so we attempt
# to follow the link ourselves. If we succeed, join the tail
# and return.
- new_path = _readlink_deep(path)
+ new_path = _readlink_deep(path,
+ ignored_error=ignored_error)
if new_path != path:
return join(new_path, tail) if tail else new_path
- except OSError:
+ except ignored_error:
# If we fail to readlink(), let's keep traversing
pass
path, name = split(path)
@@ -690,6 +693,15 @@ else:
if normcase(path) == normcase(devnull):
return '\\\\.\\NUL'
had_prefix = path.startswith(prefix)
+
+ if strict is ALLOW_MISSING:
+ ignored_error = FileNotFoundError
+ strict = True
+ elif strict:
+ ignored_error = ()
+ else:
+ ignored_error = OSError
+
if not had_prefix and not isabs(path):
path = join(cwd, path)
try:
@@ -697,17 +709,16 @@ else:
initial_winerror = 0
except ValueError as ex:
# gh-106242: Raised for embedded null characters
- # In strict mode, we convert into an OSError.
+ # In strict modes, we convert into an OSError.
# Non-strict mode returns the path as-is, since we've already
# made it absolute.
if strict:
raise OSError(str(ex)) from None
path = normpath(path)
- except OSError as ex:
- if strict:
- raise
+ except ignored_error as ex:
initial_winerror = ex.winerror
- path = _getfinalpathname_nonstrict(path)
+ path = _getfinalpathname_nonstrict(path,
+ ignored_error=ignored_error)
# The path returned by _getfinalpathname will always start with \\?\ -
# strip off that prefix unless it was already provided on the original
# path.
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
index 5b4d78b..ce71a47 100644
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -35,7 +35,7 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
"samefile","sameopenfile","samestat",
"curdir","pardir","sep","pathsep","defpath","altsep","extsep",
"devnull","realpath","supports_unicode_filenames","relpath",
- "commonpath"]
+ "commonpath", "ALLOW_MISSING"]
def _get_sep(path):
@@ -427,6 +427,15 @@ def _joinrealpath(path, rest, strict, seen):
sep = '/'
curdir = '.'
pardir = '..'
+ getcwd = os.getcwd
+ if strict is ALLOW_MISSING:
+ ignored_error = FileNotFoundError
+ elif strict:
+ ignored_error = ()
+ else:
+ ignored_error = OSError
+
+ maxlinks = None
if isabs(rest):
rest = rest[1:]
@@ -449,9 +458,7 @@ def _joinrealpath(path, rest, strict, seen):
newpath = join(path, name)
try:
st = os.lstat(newpath)
- except OSError:
- if strict:
- raise
+ except ignored_error:
is_link = False
else:
is_link = stat.S_ISLNK(st.st_mode)
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index b5fb5c8..44e69cd 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -758,10 +758,22 @@ class LinkOutsideDestinationError(FilterError):
super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
+ 'which is outside the destination')
+class LinkFallbackError(FilterError):
+ def __init__(self, tarinfo, path):
+ self.tarinfo = tarinfo
+ self._path = path
+ super().__init__(f'link {tarinfo.name!r} would be extracted as a '
+ + f'copy of {path!r}, which was rejected')
+
+# Errors caused by filters -- both "fatal" and "non-fatal" -- that
+# we consider to be issues with the argument, rather than a bug in the
+# filter function
+_FILTER_ERRORS = (FilterError, OSError, ExtractError)
+
def _get_filtered_attrs(member, dest_path, for_data=True):
new_attrs = {}
name = member.name
- dest_path = os.path.realpath(dest_path)
+ dest_path = os.path.realpath(dest_path, strict=os.path.ALLOW_MISSING)
# Strip leading / (tar's directory separator) from filenames.
# Include os.sep (target OS directory separator) as well.
if name.startswith(('/', os.sep)):
@@ -771,7 +783,8 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
# For example, 'C:/foo' on Windows.
raise AbsolutePathError(member)
# Ensure we stay in the destination
- target_path = os.path.realpath(os.path.join(dest_path, name))
+ target_path = os.path.realpath(os.path.join(dest_path, name),
+ strict=os.path.ALLOW_MISSING)
if os.path.commonpath([target_path, dest_path]) != dest_path:
raise OutsideDestinationError(member, target_path)
# Limit permissions (no high bits, and go-w)
@@ -809,6 +822,9 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
if member.islnk() or member.issym():
if os.path.isabs(member.linkname):
raise AbsoluteLinkError(member)
+ normalized = os.path.normpath(member.linkname)
+ if normalized != member.linkname:
+ new_attrs['linkname'] = normalized
if member.issym():
target_path = os.path.join(dest_path,
os.path.dirname(name),
@@ -816,7 +832,8 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
else:
target_path = os.path.join(dest_path,
member.linkname)
- target_path = os.path.realpath(target_path)
+ target_path = os.path.realpath(target_path,
+ strict=os.path.ALLOW_MISSING)
if os.path.commonpath([target_path, dest_path]) != dest_path:
raise LinkOutsideDestinationError(member, target_path)
return new_attrs
@@ -2325,30 +2342,58 @@ class TarFile(object):
members = self
for member in members:
- tarinfo = self._get_extract_tarinfo(member, filter_function, path)
+ tarinfo, unfiltered = self._get_extract_tarinfo(
+ member, filter_function, path)
if tarinfo is None:
continue
if tarinfo.isdir():
# For directories, delay setting attributes until later,
# since permissions can interfere with extraction and
# extracting contents can reset mtime.
- directories.append(tarinfo)
+ directories.append(unfiltered)
self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
- numeric_owner=numeric_owner)
+ numeric_owner=numeric_owner,
+ filter_function=filter_function)
# Reverse sort directories.
directories.sort(key=lambda a: a.name, reverse=True)
+
# Set correct owner, mtime and filemode on directories.
- for tarinfo in directories:
- dirpath = os.path.join(path, tarinfo.name)
+ for unfiltered in directories:
try:
+ # Need to re-apply any filter, to take the *current* filesystem
+ # state into account.
+ try:
+ tarinfo = filter_function(unfiltered, path)
+ except _FILTER_ERRORS as exc:
+ self._log_no_directory_fixup(unfiltered, repr(exc))
+ continue
+ if tarinfo is None:
+ self._log_no_directory_fixup(unfiltered,
+ 'excluded by filter')
+ continue
+ dirpath = os.path.join(path, tarinfo.name)
+ try:
+ lstat = os.lstat(dirpath)
+ except FileNotFoundError:
+ self._log_no_directory_fixup(tarinfo, 'missing')
+ continue
+ if not stat.S_ISDIR(lstat.st_mode):
+ # This is no longer a directory; presumably a later
+ # member overwrote the entry.
+ self._log_no_directory_fixup(tarinfo, 'not a directory')
+ continue
self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
self.utime(tarinfo, dirpath)
self.chmod(tarinfo, dirpath)
except ExtractError as e:
self._handle_nonfatal_error(e)
+ def _log_no_directory_fixup(self, member, reason):
+ self._dbg(2, "tarfile: Not fixing up directory %r (%s)" %
+ (member.name, reason))
+
def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
filter=None):
"""Extract a member from the archive to the current working directory,
@@ -2364,41 +2409,56 @@ class TarFile(object):
String names of common filters are accepted.
"""
filter_function = self._get_filter_function(filter)
- tarinfo = self._get_extract_tarinfo(member, filter_function, path)
+ tarinfo, unfiltered = self._get_extract_tarinfo(
+ member, filter_function, path)
if tarinfo is not None:
self._extract_one(tarinfo, path, set_attrs, numeric_owner)
def _get_extract_tarinfo(self, member, filter_function, path):
- """Get filtered TarInfo (or None) from member, which might be a str"""
+ """Get (filtered, unfiltered) TarInfos from *member*
+
+ *member* might be a string.
+
+ Return (None, None) if not found.
+ """
+
if isinstance(member, str):
- tarinfo = self.getmember(member)
+ unfiltered = self.getmember(member)
else:
- tarinfo = member
+ unfiltered = member
- unfiltered = tarinfo
+ filtered = None
try:
- tarinfo = filter_function(tarinfo, path)
+ filtered = filter_function(unfiltered, path)
except (OSError, FilterError) as e:
self._handle_fatal_error(e)
except ExtractError as e:
self._handle_nonfatal_error(e)
- if tarinfo is None:
+ if filtered is None:
self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
- return None
+ return None, None
+
# Prepare the link target for makelink().
- if tarinfo.islnk():
- tarinfo = copy.copy(tarinfo)
- tarinfo._link_target = os.path.join(path, tarinfo.linkname)
- return tarinfo
+ if filtered.islnk():
+ filtered = copy.copy(filtered)
+ filtered._link_target = os.path.join(path, filtered.linkname)
+ return filtered, unfiltered
- def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
- """Extract from filtered tarinfo to disk"""
+ def _extract_one(self, tarinfo, path, set_attrs, numeric_owner,
+ filter_function=None):
+ """Extract from filtered tarinfo to disk.
+
+ filter_function is only used when extracting a *different*
+ member (e.g. as fallback to creating a symlink)
+ """
self._check("r")
try:
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
set_attrs=set_attrs,
- numeric_owner=numeric_owner)
+ numeric_owner=numeric_owner,
+ filter_function=filter_function,
+ extraction_root=path)
except OSError as e:
self._handle_fatal_error(e)
except ExtractError as e:
@@ -2456,9 +2516,13 @@ class TarFile(object):
return None
def _extract_member(self, tarinfo, targetpath, set_attrs=True,
- numeric_owner=False):
- """Extract the TarInfo object tarinfo to a physical
+ numeric_owner=False, *, filter_function=None,
+ extraction_root=None):
+ """Extract the filtered TarInfo object tarinfo to a physical
file called targetpath.
+
+ filter_function is only used when extracting a *different*
+ member (e.g. as fallback to creating a symlink)
"""
# Fetch the TarInfo object for the given name
# and build the destination pathname, replacing
@@ -2487,7 +2551,10 @@ class TarFile(object):
elif tarinfo.ischr() or tarinfo.isblk():
self.makedev(tarinfo, targetpath)
elif tarinfo.islnk() or tarinfo.issym():
- self.makelink(tarinfo, targetpath)
+ self.makelink_with_filter(
+ tarinfo, targetpath,
+ filter_function=filter_function,
+ extraction_root=extraction_root)
elif tarinfo.type not in SUPPORTED_TYPES:
self.makeunknown(tarinfo, targetpath)
else:
@@ -2570,10 +2637,18 @@ class TarFile(object):
os.makedev(tarinfo.devmajor, tarinfo.devminor))
def makelink(self, tarinfo, targetpath):
+ return self.makelink_with_filter(tarinfo, targetpath, None, None)
+
+ def makelink_with_filter(self, tarinfo, targetpath,
+ filter_function, extraction_root):
"""Make a (symbolic) link called targetpath. If it cannot be created
(platform limitation), we try to make a copy of the referenced file
instead of a link.
+
+ filter_function is only used when extracting a *different*
+ member (e.g. as fallback to creating a link).
"""
+ keyerror_to_extracterror = False
try:
# For systems that support symbolic and hard links.
if tarinfo.issym():
@@ -2581,18 +2656,38 @@ class TarFile(object):
# Avoid FileExistsError on following os.symlink.
os.unlink(targetpath)
os.symlink(tarinfo.linkname, targetpath)
+ return
else:
if os.path.exists(tarinfo._link_target):
os.link(tarinfo._link_target, targetpath)
- else:
- self._extract_member(self._find_link_target(tarinfo),
- targetpath)
+ return
except symlink_exception:
+ keyerror_to_extracterror = True
+
+ try:
+ unfiltered = self._find_link_target(tarinfo)
+ except KeyError:
+ if keyerror_to_extracterror:
+ raise ExtractError(
+ "unable to resolve link inside archive") from None
+ else:
+ raise
+
+ if filter_function is None:
+ filtered = unfiltered
+ else:
+ if extraction_root is None:
+ raise ExtractError(
+ "makelink_with_filter: if filter_function is not None, "
+ + "extraction_root must also not be None")
try:
- self._extract_member(self._find_link_target(tarinfo),
- targetpath)
- except KeyError:
- raise ExtractError("unable to resolve link inside archive") from None
+ filtered = filter_function(unfiltered, extraction_root)
+ except _FILTER_ERRORS as cause:
+ raise LinkFallbackError(tarinfo, unfiltered.name) from cause
+ if filtered is not None:
+ self._extract_member(filtered, targetpath,
+ filter_function=filter_function,
+ extraction_root=extraction_root)
def chown(self, tarinfo, targetpath, numeric_owner):
"""Set owner of targetpath according to tarinfo. If numeric_owner
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index 88660fc..7d0c0a0 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -1,9 +1,11 @@
import ntpath
import os
import string
+import subprocess
import sys
import unittest
import warnings
+from ntpath import ALLOW_MISSING
from test.support import os_helper
from test.support import TestFailed, is_emscripten
from test.support.os_helper import FakePath
@@ -75,6 +77,27 @@ def tester(fn, wantResult):
%(str(fn), str(wantResult), repr(gotResult)))
+def _parameterize(*parameters):
+ """Simplistic decorator to parametrize a test
+
+ Runs the decorated test multiple times in subTest, with a value from
+ 'parameters' passed as an extra positional argument.
+ Calls doCleanups() after each run.
+
+ Not for general use. Intended to avoid indenting for easier backports.
+
+ See https://discuss.python.org/t/91827 for discussing generalizations.
+ """
+ def _parametrize_decorator(func):
+ def _parameterized(self, *args, **kwargs):
+ for parameter in parameters:
+ with self.subTest(parameter):
+ func(self, *args, parameter, **kwargs)
+ self.doCleanups()
+ return _parameterized
+ return _parametrize_decorator
+
+
class NtpathTestCase(unittest.TestCase):
def assertPathEqual(self, path1, path2):
if path1 == path2 or _norm(path1) == _norm(path2):
@@ -297,6 +320,27 @@ class TestNtpath(NtpathTestCase):
tester("ntpath.realpath('.\\.')", expected)
tester("ntpath.realpath('\\'.join(['.'] * 100))", expected)
+ def test_realpath_curdir_strict(self):
+ expected = ntpath.normpath(os.getcwd())
+ tester("ntpath.realpath('.', strict=True)", expected)
+ tester("ntpath.realpath('./.', strict=True)", expected)
+ tester("ntpath.realpath('/'.join(['.'] * 100), strict=True)", expected)
+ tester("ntpath.realpath('.\\.', strict=True)", expected)
+ tester("ntpath.realpath('\\'.join(['.'] * 100), strict=True)", expected)
+
+ def test_realpath_curdir_missing_ok(self):
+ expected = ntpath.normpath(os.getcwd())
+ tester("ntpath.realpath('.', strict=ALLOW_MISSING)",
+ expected)
+ tester("ntpath.realpath('./.', strict=ALLOW_MISSING)",
+ expected)
+ tester("ntpath.realpath('/'.join(['.'] * 100), strict=ALLOW_MISSING)",
+ expected)
+ tester("ntpath.realpath('.\\.', strict=ALLOW_MISSING)",
+ expected)
+ tester("ntpath.realpath('\\'.join(['.'] * 100), strict=ALLOW_MISSING)",
+ expected)
+
def test_realpath_pardir(self):
expected = ntpath.normpath(os.getcwd())
tester("ntpath.realpath('..')", ntpath.dirname(expected))
@@ -309,17 +353,43 @@ class TestNtpath(NtpathTestCase):
tester("ntpath.realpath('\\'.join(['..'] * 50))",
ntpath.splitdrive(expected)[0] + '\\')
+ def test_realpath_pardir_strict(self):
+ expected = ntpath.normpath(os.getcwd())
+ tester("ntpath.realpath('..', strict=True)", ntpath.dirname(expected))
+ tester("ntpath.realpath('../..', strict=True)",
+ ntpath.dirname(ntpath.dirname(expected)))
+ tester("ntpath.realpath('/'.join(['..'] * 50), strict=True)",
+ ntpath.splitdrive(expected)[0] + '\\')
+ tester("ntpath.realpath('..\\..', strict=True)",
+ ntpath.dirname(ntpath.dirname(expected)))
+ tester("ntpath.realpath('\\'.join(['..'] * 50), strict=True)",
+ ntpath.splitdrive(expected)[0] + '\\')
+
+ def test_realpath_pardir_missing_ok(self):
+ expected = ntpath.normpath(os.getcwd())
+ tester("ntpath.realpath('..', strict=ALLOW_MISSING)",
+ ntpath.dirname(expected))
+ tester("ntpath.realpath('../..', strict=ALLOW_MISSING)",
+ ntpath.dirname(ntpath.dirname(expected)))
+ tester("ntpath.realpath('/'.join(['..'] * 50), strict=ALLOW_MISSING)",
+ ntpath.splitdrive(expected)[0] + '\\')
+ tester("ntpath.realpath('..\\..', strict=ALLOW_MISSING)",
+ ntpath.dirname(ntpath.dirname(expected)))
+ tester("ntpath.realpath('\\'.join(['..'] * 50), strict=ALLOW_MISSING)",
+ ntpath.splitdrive(expected)[0] + '\\')
+
@os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
- def test_realpath_basic(self):
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_basic(self, kwargs):
ABSTFN = ntpath.abspath(os_helper.TESTFN)
open(ABSTFN, "wb").close()
self.addCleanup(os_helper.unlink, ABSTFN)
self.addCleanup(os_helper.unlink, ABSTFN + "1")
os.symlink(ABSTFN, ABSTFN + "1")
- self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
- self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
+ self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN)
+ self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1"), **kwargs),
os.fsencode(ABSTFN))
# gh-88013: call ntpath.realpath with binary drive name may raise a
@@ -333,8 +403,13 @@ class TestNtpath(NtpathTestCase):
self.assertEqual(ntpath.realpath(d), d)
# gh-106242: Embedded nulls and non-strict fallback to abspath
- self.assertEqual(ABSTFN + "\0spam",
- ntpath.realpath(os_helper.TESTFN + "\0spam", strict=False))
+ if kwargs:
+ with self.assertRaises(OSError):
+ ntpath.realpath(os_helper.TESTFN + "\0spam",
+ **kwargs)
+ else:
+ self.assertEqual(ABSTFN + "\0spam",
+ ntpath.realpath(os_helper.TESTFN + "\0spam", **kwargs))
@os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
@@ -346,19 +421,77 @@ class TestNtpath(NtpathTestCase):
self.addCleanup(os_helper.unlink, ABSTFN)
self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN, strict=True)
self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN + "2", strict=True)
+
+ @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+ def test_realpath_invalid_paths(self):
+ realpath = ntpath.realpath
+ ABSTFN = ntpath.abspath(os_helper.TESTFN)
+ ABSTFNb = os.fsencode(ABSTFN)
+ path = ABSTFN + '\x00'
+ # gh-106242: Embedded nulls and non-strict fallback to abspath
+ self.assertEqual(realpath(path, strict=False), path)
# gh-106242: Embedded nulls should raise OSError (not ValueError)
- self.assertRaises(OSError, ntpath.realpath, ABSTFN + "\0spam", strict=True)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=True)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING)
+ path = ABSTFNb + b'\x00'
+ self.assertEqual(realpath(path, strict=False), path)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=True)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING)
+ path = ABSTFN + '\\nonexistent\\x\x00'
+ self.assertEqual(realpath(path, strict=False), path)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=True)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING)
+ path = ABSTFNb + b'\\nonexistent\\x\x00'
+ self.assertEqual(realpath(path, strict=False), path)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=True)
+ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING)
+ path = ABSTFN + '\x00\\..'
+ self.assertEqual(realpath(path, strict=False), os.getcwd())
+ self.assertEqual(realpath(path, strict=True), os.getcwd())
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwd())
+ path = ABSTFNb + b'\x00\\..'
+ self.assertEqual(realpath(path, strict=False), os.getcwdb())
+ self.assertEqual(realpath(path, strict=True), os.getcwdb())
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwdb())
+ path = ABSTFN + '\\nonexistent\\x\x00\\..'
+ self.assertEqual(realpath(path, strict=False), ABSTFN + '\\nonexistent')
+ self.assertRaises(OSError, ntpath.realpath, path, strict=True)
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFN + '\\nonexistent')
+ path = ABSTFNb + b'\\nonexistent\\x\x00\\..'
+ self.assertEqual(realpath(path, strict=False), ABSTFNb + b'\\nonexistent')
+ self.assertRaises(OSError, ntpath.realpath, path, strict=True)
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFNb + b'\\nonexistent')
+
+ @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_invalid_unicode_paths(self, kwargs):
+ realpath = ntpath.realpath
+ ABSTFN = ntpath.abspath(os_helper.TESTFN)
+ ABSTFNb = os.fsencode(ABSTFN)
+ path = ABSTFNb + b'\xff'
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
+ path = ABSTFNb + b'\\nonexistent\\\xff'
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
+ path = ABSTFNb + b'\xff\\..'
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
+ path = ABSTFNb + b'\\nonexistent\\\xff\\..'
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
+ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs)
@os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
- def test_realpath_relative(self):
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_relative(self, kwargs):
ABSTFN = ntpath.abspath(os_helper.TESTFN)
open(ABSTFN, "wb").close()
self.addCleanup(os_helper.unlink, ABSTFN)
self.addCleanup(os_helper.unlink, ABSTFN + "1")
os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1"))
- self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
+ self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN)
@os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
@@ -510,7 +643,62 @@ class TestNtpath(NtpathTestCase):
@os_helper.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
- def test_realpath_symlink_prefix(self):
+ def test_realpath_symlink_loops_raise(self):
+ # Symlink loops raise OSError in ALLOW_MISSING mode
+ ABSTFN = ntpath.abspath(os_helper.TESTFN)
+ self.addCleanup(os_helper.unlink, ABSTFN)
+ self.addCleanup(os_helper.unlink, ABSTFN + "1")
+ self.addCleanup(os_helper.unlink, ABSTFN + "2")
+ self.addCleanup(os_helper.unlink, ABSTFN + "y")
+ self.addCleanup(os_helper.unlink, ABSTFN + "c")
+ self.addCleanup(os_helper.unlink, ABSTFN + "a")
+ self.addCleanup(os_helper.unlink, ABSTFN + "x")
+
+ os.symlink(ABSTFN, ABSTFN)
+ self.assertRaises(OSError, ntpath.realpath, ABSTFN, strict=ALLOW_MISSING)
+
+ os.symlink(ABSTFN + "1", ABSTFN + "2")
+ os.symlink(ABSTFN + "2", ABSTFN + "1")
+ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1",
+ strict=ALLOW_MISSING)
+ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "2",
+ strict=ALLOW_MISSING)
+ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\x",
+ strict=ALLOW_MISSING)
+
+ # Windows eliminates '..' components before resolving links;
+ # realpath is not expected to raise if this removes the loop.
+ self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\.."),
+ ntpath.dirname(ABSTFN))
+ self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\x"),
+ ntpath.dirname(ABSTFN) + "\\x")
+
+ os.symlink(ABSTFN + "x", ABSTFN + "y")
+ self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\"
+ + ntpath.basename(ABSTFN) + "y"),
+ ABSTFN + "x")
+ self.assertRaises(
+ OSError, ntpath.realpath,
+ ABSTFN + "1\\..\\" + ntpath.basename(ABSTFN) + "1",
+ strict=ALLOW_MISSING)
+
+ os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
+ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "a",
+ strict=ALLOW_MISSING)
+
+ os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
+ + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
+ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "c",
+ strict=ALLOW_MISSING)
+
+ # Test using relative path as well.
+ self.assertRaises(OSError, ntpath.realpath, ntpath.basename(ABSTFN),
+ strict=ALLOW_MISSING)
+
+ @os_helper.skip_unless_symlink
+ @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_symlink_prefix(self, kwargs):
ABSTFN = ntpath.abspath(os_helper.TESTFN)
self.addCleanup(os_helper.unlink, ABSTFN + "3")
self.addCleanup(os_helper.unlink, "\\\\?\\" + ABSTFN + "3.")
@@ -525,9 +713,9 @@ class TestNtpath(NtpathTestCase):
f.write(b'1')
os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link")
- self.assertPathEqual(ntpath.realpath(ABSTFN + "3link"),
+ self.assertPathEqual(ntpath.realpath(ABSTFN + "3link", **kwargs),
ABSTFN + "3")
- self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link"),
+ self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link", **kwargs),
"\\\\?\\" + ABSTFN + "3.")
# Resolved paths should be usable to open target files
@@ -537,14 +725,17 @@ class TestNtpath(NtpathTestCase):
self.assertEqual(f.read(), b'1')
# When the prefix is included, it is not stripped
- self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"),
+ self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link", **kwargs),
"\\\\?\\" + ABSTFN + "3")
- self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"),
+ self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link", **kwargs),
"\\\\?\\" + ABSTFN + "3.")
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_nul(self):
tester("ntpath.realpath('NUL')", r'\\.\NUL')
+ tester("ntpath.realpath('NUL', strict=False)", r'\\.\NUL')
+ tester("ntpath.realpath('NUL', strict=True)", r'\\.\NUL')
+ tester("ntpath.realpath('NUL', strict=ALLOW_MISSING)", r'\\.\NUL')
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
@unittest.skipUnless(HAVE_GETSHORTPATHNAME, 'need _getshortpathname')
@@ -568,12 +759,20 @@ class TestNtpath(NtpathTestCase):
self.assertPathEqual(test_file_long, ntpath.realpath(test_file_short))
- with os_helper.change_cwd(test_dir_long):
- self.assertPathEqual(test_file_long, ntpath.realpath("file.txt"))
- with os_helper.change_cwd(test_dir_long.lower()):
- self.assertPathEqual(test_file_long, ntpath.realpath("file.txt"))
- with os_helper.change_cwd(test_dir_short):
- self.assertPathEqual(test_file_long, ntpath.realpath("file.txt"))
+ for kwargs in {}, {'strict': True}, {'strict': ALLOW_MISSING}:
+ with self.subTest(**kwargs):
+ with os_helper.change_cwd(test_dir_long):
+ self.assertPathEqual(
+ test_file_long,
+ ntpath.realpath("file.txt", **kwargs))
+ with os_helper.change_cwd(test_dir_long.lower()):
+ self.assertPathEqual(
+ test_file_long,
+ ntpath.realpath("file.txt", **kwargs))
+ with os_helper.change_cwd(test_dir_short):
+ self.assertPathEqual(
+ test_file_long,
+ ntpath.realpath("file.txt", **kwargs))
def test_expandvars(self):
with os_helper.EnvironmentVarGuard() as env:
diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py
index 8a1dd13..3774b75 100644
--- a/Lib/test/test_posixpath.py
+++ b/Lib/test/test_posixpath.py
@@ -2,7 +2,9 @@ import os
import posixpath
import sys
import unittest
-from posixpath import realpath, abspath, dirname, basename
+from functools import partial
+from posixpath import realpath, abspath, dirname, basename, ALLOW_MISSING
+from test import support
from test import test_genericpath
from test.support import import_helper
from test.support import os_helper
@@ -36,6 +38,26 @@ def safe_rmdir(dirname):
except OSError:
pass
+def _parameterize(*parameters):
+ """Simplistic decorator to parametrize a test
+
+ Runs the decorated test multiple times in subTest, with a value from
+ 'parameters' passed as an extra positional argument.
+ Does *not* call doCleanups() after each run.
+
+ Not for general use. Intended to avoid indenting for easier backports.
+
+ See https://discuss.python.org/t/91827 for discussing generalizations.
+ """
+ def _parametrize_decorator(func):
+ def _parameterized(self, *args, **kwargs):
+ for parameter in parameters:
+ with self.subTest(parameter):
+ func(self, *args, parameter, **kwargs)
+ return _parameterized
+ return _parametrize_decorator
+
+
class PosixPathTest(unittest.TestCase):
def setUp(self):
@@ -369,32 +391,35 @@ class PosixPathTest(unittest.TestCase):
self.assertEqual(result, expected)
@skip_if_ABSTFN_contains_backslash
- def test_realpath_curdir(self):
- self.assertEqual(realpath('.'), os.getcwd())
- self.assertEqual(realpath('./.'), os.getcwd())
- self.assertEqual(realpath('/'.join(['.'] * 100)), os.getcwd())
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_curdir(self, kwargs):
+ self.assertEqual(realpath('.', **kwargs), os.getcwd())
+ self.assertEqual(realpath('./.', **kwargs), os.getcwd())
+ self.assertEqual(realpath('/'.join(['.'] * 100), **kwargs), os.getcwd())
- self.assertEqual(realpath(b'.'), os.getcwdb())
- self.assertEqual(realpath(b'./.'), os.getcwdb())
- self.assertEqual(realpath(b'/'.join([b'.'] * 100)), os.getcwdb())
+ self.assertEqual(realpath(b'.', **kwargs), os.getcwdb())
+ self.assertEqual(realpath(b'./.', **kwargs), os.getcwdb())
+ self.assertEqual(realpath(b'/'.join([b'.'] * 100), **kwargs), os.getcwdb())
@skip_if_ABSTFN_contains_backslash
- def test_realpath_pardir(self):
- self.assertEqual(realpath('..'), dirname(os.getcwd()))
- self.assertEqual(realpath('../..'), dirname(dirname(os.getcwd())))
- self.assertEqual(realpath('/'.join(['..'] * 100)), '/')
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_pardir(self, kwargs):
+ self.assertEqual(realpath('..', **kwargs), dirname(os.getcwd()))
+ self.assertEqual(realpath('../..', **kwargs), dirname(dirname(os.getcwd())))
+ self.assertEqual(realpath('/'.join(['..'] * 100), **kwargs), '/')
- self.assertEqual(realpath(b'..'), dirname(os.getcwdb()))
- self.assertEqual(realpath(b'../..'), dirname(dirname(os.getcwdb())))
- self.assertEqual(realpath(b'/'.join([b'..'] * 100)), b'/')
+ self.assertEqual(realpath(b'..', **kwargs), dirname(os.getcwdb()))
+ self.assertEqual(realpath(b'../..', **kwargs), dirname(dirname(os.getcwdb())))
+ self.assertEqual(realpath(b'/'.join([b'..'] * 100), **kwargs), b'/')
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_basic(self):
+ @_parameterize({}, {'strict': ALLOW_MISSING})
+ def test_realpath_basic(self, kwargs):
# Basic operation.
try:
os.symlink(ABSTFN+"1", ABSTFN)
- self.assertEqual(realpath(ABSTFN), ABSTFN+"1")
+ self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1")
finally:
os_helper.unlink(ABSTFN)
@@ -410,15 +435,122 @@ class PosixPathTest(unittest.TestCase):
finally:
os_helper.unlink(ABSTFN)
+ def test_realpath_invalid_paths(self):
+ path = '/\x00'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(ValueError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+ path = b'/\x00'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(ValueError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+ path = '/nonexistent/x\x00'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+ path = b'/nonexistent/x\x00'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+ path = '/\x00/..'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(ValueError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+ path = b'/\x00/..'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(ValueError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+
+ path = '/nonexistent/x\x00/..'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+ path = b'/nonexistent/x\x00/..'
+ self.assertRaises(ValueError, realpath, path, strict=False)
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING)
+
+ path = '/\udfff'
+ if sys.platform == 'win32':
+ self.assertEqual(realpath(path, strict=False), path)
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), path)
+ else:
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False)
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=True)
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING)
+ path = '/nonexistent/\udfff'
+ if sys.platform == 'win32':
+ self.assertEqual(realpath(path, strict=False), path)
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), path)
+ else:
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False)
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING)
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ path = '/\udfff/..'
+ if sys.platform == 'win32':
+ self.assertEqual(realpath(path, strict=False), '/')
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/')
+ else:
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False)
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=True)
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING)
+ path = '/nonexistent/\udfff/..'
+ if sys.platform == 'win32':
+ self.assertEqual(realpath(path, strict=False), '/nonexistent')
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/nonexistent')
+ else:
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False)
+ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING)
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+
+ path = b'/\xff'
+ if sys.platform == 'win32':
+ self.assertRaises(UnicodeDecodeError, realpath, path, strict=False)
+ self.assertRaises(UnicodeDecodeError, realpath, path, strict=True)
+ self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING)
+ else:
+ self.assertEqual(realpath(path, strict=False), path)
+ if support.is_wasi:
+ self.assertRaises(OSError, realpath, path, strict=True)
+ self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING)
+ else:
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+ self.assertEqual(realpath(path, strict=ALLOW_MISSING), path)
+ path = b'/nonexistent/\xff'
+ if sys.platform == 'win32':
+ self.assertRaises(UnicodeDecodeError, realpath, path, strict=False)
+ self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING)
+ else:
+ self.assertEqual(realpath(path, strict=False), path)
+ if support.is_wasi:
+ self.assertRaises(OSError, realpath, path, strict=True)
+ self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING)
+ else:
+ self.assertRaises(FileNotFoundError, realpath, path, strict=True)
+
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_relative(self):
+ @_parameterize({}, {'strict': ALLOW_MISSING})
+ def test_realpath_relative(self, kwargs):
try:
os.symlink(posixpath.relpath(ABSTFN+"1"), ABSTFN)
- self.assertEqual(realpath(ABSTFN), ABSTFN+"1")
+ self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1")
finally:
os_helper.unlink(ABSTFN)
+ @os_helper.skip_unless_symlink
+ @skip_if_ABSTFN_contains_backslash
+ @_parameterize({}, {'strict': ALLOW_MISSING})
+ def test_realpath_missing_pardir(self, kwargs):
+ try:
+ os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN)
+ self.assertEqual(
+ realpath("nonexistent/../" + os_helper.TESTFN, **kwargs), ABSTFN + "1")
+ finally:
+ os_helper.unlink(os_helper.TESTFN)
+
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
def test_realpath_symlink_loops(self):
@@ -462,37 +594,38 @@ class PosixPathTest(unittest.TestCase):
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_symlink_loops_strict(self):
+ @_parameterize({'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_symlink_loops_strict(self, kwargs):
# Bug #43757, raise OSError if we get into an infinite symlink loop in
- # strict mode.
+ # the strict modes.
try:
os.symlink(ABSTFN, ABSTFN)
- self.assertRaises(OSError, realpath, ABSTFN, strict=True)
+ self.assertRaises(OSError, realpath, ABSTFN, **kwargs)
os.symlink(ABSTFN+"1", ABSTFN+"2")
os.symlink(ABSTFN+"2", ABSTFN+"1")
- self.assertRaises(OSError, realpath, ABSTFN+"1", strict=True)
- self.assertRaises(OSError, realpath, ABSTFN+"2", strict=True)
+ self.assertRaises(OSError, realpath, ABSTFN+"1", **kwargs)
+ self.assertRaises(OSError, realpath, ABSTFN+"2", **kwargs)
- self.assertRaises(OSError, realpath, ABSTFN+"1/x", strict=True)
- self.assertRaises(OSError, realpath, ABSTFN+"1/..", strict=True)
- self.assertRaises(OSError, realpath, ABSTFN+"1/../x", strict=True)
+ self.assertRaises(OSError, realpath, ABSTFN+"1/x", **kwargs)
+ self.assertRaises(OSError, realpath, ABSTFN+"1/..", **kwargs)
+ self.assertRaises(OSError, realpath, ABSTFN+"1/../x", **kwargs)
os.symlink(ABSTFN+"x", ABSTFN+"y")
self.assertRaises(OSError, realpath,
- ABSTFN+"1/../" + basename(ABSTFN) + "y", strict=True)
+ ABSTFN+"1/../" + basename(ABSTFN) + "y", **kwargs)
self.assertRaises(OSError, realpath,
- ABSTFN+"1/../" + basename(ABSTFN) + "1", strict=True)
+ ABSTFN+"1/../" + basename(ABSTFN) + "1", **kwargs)
os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a")
- self.assertRaises(OSError, realpath, ABSTFN+"a", strict=True)
+ self.assertRaises(OSError, realpath, ABSTFN+"a", **kwargs)
os.symlink("../" + basename(dirname(ABSTFN)) + "/" +
basename(ABSTFN) + "c", ABSTFN+"c")
- self.assertRaises(OSError, realpath, ABSTFN+"c", strict=True)
+ self.assertRaises(OSError, realpath, ABSTFN+"c", **kwargs)
# Test using relative path as well.
with os_helper.change_cwd(dirname(ABSTFN)):
- self.assertRaises(OSError, realpath, basename(ABSTFN), strict=True)
+ self.assertRaises(OSError, realpath, basename(ABSTFN), **kwargs)
finally:
os_helper.unlink(ABSTFN)
os_helper.unlink(ABSTFN+"1")
@@ -503,13 +636,14 @@ class PosixPathTest(unittest.TestCase):
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_repeated_indirect_symlinks(self):
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_repeated_indirect_symlinks(self, kwargs):
# Issue #6975.
try:
os.mkdir(ABSTFN)
os.symlink('../' + basename(ABSTFN), ABSTFN + '/self')
os.symlink('self/self/self', ABSTFN + '/link')
- self.assertEqual(realpath(ABSTFN + '/link'), ABSTFN)
+ self.assertEqual(realpath(ABSTFN + '/link', **kwargs), ABSTFN)
finally:
os_helper.unlink(ABSTFN + '/self')
os_helper.unlink(ABSTFN + '/link')
@@ -517,14 +651,15 @@ class PosixPathTest(unittest.TestCase):
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_deep_recursion(self):
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_deep_recursion(self, kwargs):
depth = 10
try:
os.mkdir(ABSTFN)
for i in range(depth):
os.symlink('/'.join(['%d' % i] * 10), ABSTFN + '/%d' % (i + 1))
os.symlink('.', ABSTFN + '/0')
- self.assertEqual(realpath(ABSTFN + '/%d' % depth), ABSTFN)
+ self.assertEqual(realpath(ABSTFN + '/%d' % depth, **kwargs), ABSTFN)
# Test using relative path as well.
with os_helper.change_cwd(ABSTFN):
@@ -536,7 +671,8 @@ class PosixPathTest(unittest.TestCase):
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_resolve_parents(self):
+ @_parameterize({}, {'strict': ALLOW_MISSING})
+ def test_realpath_resolve_parents(self, kwargs):
# We also need to resolve any symlinks in the parents of a relative
# path passed to realpath. E.g.: current working directory is
# /usr/doc with 'doc' being a symlink to /usr/share/doc. We call
@@ -547,7 +683,8 @@ class PosixPathTest(unittest.TestCase):
os.symlink(ABSTFN + "/y", ABSTFN + "/k")
with os_helper.change_cwd(ABSTFN + "/k"):
- self.assertEqual(realpath("a"), ABSTFN + "/y/a")
+ self.assertEqual(realpath("a", **kwargs),
+ ABSTFN + "/y/a")
finally:
os_helper.unlink(ABSTFN + "/k")
safe_rmdir(ABSTFN + "/y")
@@ -555,7 +692,8 @@ class PosixPathTest(unittest.TestCase):
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_resolve_before_normalizing(self):
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_resolve_before_normalizing(self, kwargs):
# Bug #990669: Symbolic links should be resolved before we
# normalize the path. E.g.: if we have directories 'a', 'k' and 'y'
# in the following hierarchy:
@@ -570,10 +708,10 @@ class PosixPathTest(unittest.TestCase):
os.symlink(ABSTFN + "/k/y", ABSTFN + "/link-y")
# Absolute path.
- self.assertEqual(realpath(ABSTFN + "/link-y/.."), ABSTFN + "/k")
+ self.assertEqual(realpath(ABSTFN + "/link-y/..", **kwargs), ABSTFN + "/k")
# Relative path.
with os_helper.change_cwd(dirname(ABSTFN)):
- self.assertEqual(realpath(basename(ABSTFN) + "/link-y/.."),
+ self.assertEqual(realpath(basename(ABSTFN) + "/link-y/..", **kwargs),
ABSTFN + "/k")
finally:
os_helper.unlink(ABSTFN + "/link-y")
@@ -583,7 +721,8 @@ class PosixPathTest(unittest.TestCase):
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
- def test_realpath_resolve_first(self):
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_resolve_first(self, kwargs):
# Bug #1213894: The first component of the path, if not absolute,
# must be resolved too.
@@ -593,13 +732,70 @@ class PosixPathTest(unittest.TestCase):
os.symlink(ABSTFN, ABSTFN + "link")
with os_helper.change_cwd(dirname(ABSTFN)):
base = basename(ABSTFN)
- self.assertEqual(realpath(base + "link"), ABSTFN)
- self.assertEqual(realpath(base + "link/k"), ABSTFN + "/k")
+ self.assertEqual(realpath(base + "link", **kwargs), ABSTFN)
+ self.assertEqual(realpath(base + "link/k", **kwargs), ABSTFN + "/k")
finally:
os_helper.unlink(ABSTFN + "link")
safe_rmdir(ABSTFN + "/k")
safe_rmdir(ABSTFN)
+ @os_helper.skip_unless_symlink
+ @skip_if_ABSTFN_contains_backslash
+ @unittest.skipIf(os.chmod not in os.supports_follow_symlinks, "Can't set symlink permissions")
+ @unittest.skipIf(sys.platform != "darwin", "only macOS requires read permission to readlink()")
+ @_parameterize({'strict': True}, {'strict': ALLOW_MISSING})
+ def test_realpath_unreadable_symlink_strict(self, kwargs):
+ try:
+ os.symlink(ABSTFN+"1", ABSTFN)
+ os.chmod(ABSTFN, 0o000, follow_symlinks=False)
+ with self.assertRaises(PermissionError):
+ realpath(ABSTFN, **kwargs)
+ with self.assertRaises(PermissionError):
+ realpath(ABSTFN + '/foo', **kwargs),
+ with self.assertRaises(PermissionError):
+ realpath(ABSTFN + '/../foo', **kwargs)
+ with self.assertRaises(PermissionError):
+ realpath(ABSTFN + '/foo/..', **kwargs)
+ finally:
+ os.chmod(ABSTFN, 0o755, follow_symlinks=False)
+ os.unlink(ABSTFN)
+
+ @skip_if_ABSTFN_contains_backslash
+ @os_helper.skip_unless_symlink
+ def test_realpath_unreadable_directory(self):
+ try:
+ os.mkdir(ABSTFN)
+ os.mkdir(ABSTFN + '/k')
+ os.chmod(ABSTFN, 0o000)
+ self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN)
+ self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN)
+ self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN)
+
+ try:
+ os.stat(ABSTFN)
+ except PermissionError:
+ pass
+ else:
+ self.skipTest('Cannot block permissions')
+
+ self.assertEqual(realpath(ABSTFN + '/k', strict=False),
+ ABSTFN + '/k')
+ self.assertRaises(PermissionError, realpath, ABSTFN + '/k',
+ strict=True)
+ self.assertRaises(PermissionError, realpath, ABSTFN + '/k',
+ strict=ALLOW_MISSING)
+
+ self.assertEqual(realpath(ABSTFN + '/missing', strict=False),
+ ABSTFN + '/missing')
+ self.assertRaises(PermissionError, realpath, ABSTFN + '/missing',
+ strict=True)
+ self.assertRaises(PermissionError, realpath, ABSTFN + '/missing',
+ strict=ALLOW_MISSING)
+ finally:
+ os.chmod(ABSTFN, 0o755)
+ safe_rmdir(ABSTFN + '/k')
+ safe_rmdir(ABSTFN)
+
def test_relpath(self):
(real_getcwd, os.getcwd) = (os.getcwd, lambda: r"/home/user/bar")
try:
@@ -776,9 +972,12 @@ class PathLikeTests(unittest.TestCase):
def test_path_abspath(self):
self.assertPathEqual(self.path.abspath)
- def test_path_realpath(self):
+ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING})
+ def test_path_realpath(self, kwargs):
self.assertPathEqual(self.path.realpath)
+ self.assertPathEqual(partial(self.path.realpath, **kwargs))
+
def test_path_relpath(self):
self.assertPathEqual(self.path.relpath)
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 0fce8c4..4581682 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -2533,7 +2533,7 @@ class MiscTest(unittest.TestCase):
"fully_trusted_filter", "data_filter",
"tar_filter", "FilterError", "AbsoluteLinkError",
"OutsideDestinationError", "SpecialFileError", "AbsolutePathError",
- "LinkOutsideDestinationError",
+ "LinkOutsideDestinationError", "LinkFallbackError",
}
support.check__all__(self, tarfile, not_exported=not_exported)
@@ -2549,6 +2549,31 @@ class MiscTest(unittest.TestCase):
str(excinfo.exception),
)
+ @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support')
+ @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod")
+ @unittest.mock.patch('os.chmod')
+ def test_deferred_directory_attributes_update(self, mock_chmod):
+ # Regression test for gh-127987: setting attributes on arbitrary files
+ tempdir = os.path.join(TEMPDIR, 'test127987')
+ def mock_chmod_side_effect(path, mode, **kwargs):
+ target_path = os.path.realpath(path)
+ if os.path.commonpath([target_path, tempdir]) != tempdir:
+ raise Exception("should not try to chmod anything outside the destination", target_path)
+ mock_chmod.side_effect = mock_chmod_side_effect
+
+ outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir')
+ with ArchiveMaker() as arc:
+ arc.add('x', symlink_to='.')
+ arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt')
+ arc.add('x', symlink_to=outside_tree_dir)
+
+ os.makedirs(outside_tree_dir)
+ try:
+ arc.open().extractall(path=tempdir, filter='tar')
+ finally:
+ os_helper.rmtree(outside_tree_dir)
+ os_helper.rmtree(tempdir)
+
class CommandLineTest(unittest.TestCase):
@@ -3109,6 +3134,10 @@ class NoneInfoExtractTests(ReadTest):
got_paths = set(
p.relative_to(directory)
for p in pathlib.Path(directory).glob('**/*'))
+ if self.extraction_filter == 'data':
+ # The 'data' filter is expected to reject special files
+ for path in 'ustar/fifotype', 'ustar/blktype', 'ustar/chrtype':
+ got_paths.discard(pathlib.Path(path))
self.assertEqual(self.control_paths, got_paths)
@contextmanager
@@ -3335,12 +3364,28 @@ class ArchiveMaker:
self.bio = None
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
- mode=None, size=None, **kwargs):
- """Add a member to the test archive. Call within `with`."""
+ mode=None, size=None, content=None, **kwargs):
+ """Add a member to the test archive. Call within `with`.
+
+ Provides many shortcuts:
+ - default `type` is based on symlink_to, hardlink_to, and trailing `/`
+ in name (which is stripped)
+ - size & content defaults are based on each other
+ - content can be str or bytes
+ - mode should be textual ('-rwxrwxrwx')
+
+ (add more! this is unstable internal test-only API)
+ """
name = str(name)
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
+ if content is not None:
+ if isinstance(content, str):
+ content = content.encode()
+ size = len(content)
if size is not None:
tarinfo.size = size
+ if content is None:
+ content = bytes(tarinfo.size)
if mode:
tarinfo.mode = _filemode_to_int(mode)
if symlink_to is not None:
@@ -3354,7 +3399,7 @@ class ArchiveMaker:
if type is not None:
tarinfo.type = type
if tarinfo.isreg():
- fileobj = io.BytesIO(bytes(tarinfo.size))
+ fileobj = io.BytesIO(content)
else:
fileobj = None
self.tar_w.addfile(tarinfo, fileobj)
@@ -3388,7 +3433,7 @@ class TestExtractionFilters(unittest.TestCase):
destdir = outerdir / 'dest'
@contextmanager
- def check_context(self, tar, filter):
+ def check_context(self, tar, filter, *, check_flag=True):
"""Extracts `tar` to `self.destdir` and allows checking the result
If an error occurs, it must be checked using `expect_exception`
@@ -3397,27 +3442,40 @@ class TestExtractionFilters(unittest.TestCase):
except the destination directory itself and parent directories of
other files.
When checking directories, do so before their contents.
+
+ A file called 'flag' is made in outerdir (i.e. outside destdir)
+ before extraction; it should not be altered nor should its contents
+ be read/copied.
"""
with os_helper.temp_dir(self.outerdir):
+ flag_path = self.outerdir / 'flag'
+ flag_path.write_text('capture me')
try:
tar.extractall(self.destdir, filter=filter)
except Exception as exc:
self.raised_exception = exc
+ self.reraise_exception = True
self.expected_paths = set()
else:
self.raised_exception = None
+ self.reraise_exception = False
self.expected_paths = set(self.outerdir.glob('**/*'))
self.expected_paths.discard(self.destdir)
+ self.expected_paths.discard(flag_path)
try:
- yield
+ yield self
finally:
tar.close()
- if self.raised_exception:
+ if self.reraise_exception:
raise self.raised_exception
self.assertEqual(self.expected_paths, set())
+ if check_flag:
+ self.assertEqual(flag_path.read_text(), 'capture me')
+ else:
+ assert filter == 'fully_trusted'
def expect_file(self, name, type=None, symlink_to=None, mode=None,
- size=None):
+ size=None, content=None):
"""Check a single file. See check_context."""
if self.raised_exception:
raise self.raised_exception
@@ -3436,26 +3494,45 @@ class TestExtractionFilters(unittest.TestCase):
# The symlink might be the same (textually) as what we expect,
# but some systems change the link to an equivalent path, so
# we fall back to samefile().
- if expected != got:
- self.assertTrue(got.samefile(expected))
+ try:
+ if expected != got:
+ self.assertTrue(got.samefile(expected))
+ except Exception as e:
+ # attach a note, so it's shown even if `samefile` fails
+ e.add_note(f'{expected=}, {got=}')
+ raise
elif type == tarfile.REGTYPE or type is None:
self.assertTrue(path.is_file())
elif type == tarfile.DIRTYPE:
self.assertTrue(path.is_dir())
elif type == tarfile.FIFOTYPE:
self.assertTrue(path.is_fifo())
+ elif type == tarfile.SYMTYPE:
+ self.assertTrue(path.is_symlink())
else:
raise NotImplementedError(type)
if size is not None:
self.assertEqual(path.stat().st_size, size)
+ if content is not None:
+ self.assertEqual(path.read_text(), content)
for parent in path.parents:
self.expected_paths.discard(parent)
+ def expect_any_tree(self, name):
+ """Check a directory; forget about its contents."""
+ tree_path = (self.destdir / name).resolve()
+ self.expect_file(tree_path, type=tarfile.DIRTYPE)
+ self.expected_paths = {
+ p for p in self.expected_paths
+ if tree_path not in p.parents
+ }
+
def expect_exception(self, exc_type, message_re='.'):
with self.assertRaisesRegex(exc_type, message_re):
if self.raised_exception is not None:
raise self.raised_exception
- self.raised_exception = None
+ self.reraise_exception = False
+ return self.raised_exception
def test_benign_file(self):
with ArchiveMaker() as arc:
@@ -3540,6 +3617,80 @@ class TestExtractionFilters(unittest.TestCase):
with self.check_context(arc.open(), 'data'):
self.expect_file('parent/evil')
+ @symlink_test
+ @os_helper.skip_unless_symlink
+ def test_realpath_limit_attack(self):
+ # (CVE-2025-4517)
+
+ with ArchiveMaker() as arc:
+ # populate the symlinks and dirs that expand in os.path.realpath()
+ # The component length is chosen so that in common cases, the unexpanded
+ # path fits in PATH_MAX, but it overflows when the final symlink
+ # is expanded
+ steps = "abcdefghijklmnop"
+ if sys.platform == 'win32':
+ component = 'd' * 25
+ elif 'PC_PATH_MAX' in os.pathconf_names:
+ max_path_len = os.pathconf(self.outerdir.parent, "PC_PATH_MAX")
+ path_sep_len = 1
+ dest_len = len(str(self.destdir)) + path_sep_len
+ component_len = (max_path_len - dest_len) // (len(steps) + path_sep_len)
+ component = 'd' * component_len
+ else:
+ raise NotImplementedError("Need to guess component length for {sys.platform}")
+ path = ""
+ step_path = ""
+ for i in steps:
+ arc.add(os.path.join(path, component), type=tarfile.DIRTYPE,
+ mode='drwxrwxrwx')
+ arc.add(os.path.join(path, i), symlink_to=component)
+ path = os.path.join(path, component)
+ step_path = os.path.join(step_path, i)
+ # create the final symlink that exceeds PATH_MAX and simply points
+ # to the top dir.
+ # this link will never be expanded by
+ # os.path.realpath(strict=False), nor anything after it.
+ linkpath = os.path.join(*steps, "l"*254)
+ parent_segments = [".."] * len(steps)
+ arc.add(linkpath, symlink_to=os.path.join(*parent_segments))
+ # make a symlink outside to keep the tar command happy
+ arc.add("escape", symlink_to=os.path.join(linkpath, ".."))
+ # use the symlinks above, that are not checked, to create a hardlink
+ # to a file outside of the destination path
+ arc.add("flaglink", hardlink_to=os.path.join("escape", "flag"))
+ # now that we have the hardlink we can overwrite the file
+ arc.add("flaglink", content='overwrite')
+ # we can also create new files as well!
+ arc.add("escape/newfile", content='new')
+
+ with (self.subTest('fully_trusted'),
+ self.check_context(arc.open(), filter='fully_trusted',
+ check_flag=False)):
+ if sys.platform == 'win32':
+ self.expect_exception((FileNotFoundError, FileExistsError))
+ elif self.raised_exception:
+ # Cannot symlink/hardlink: tarfile falls back to getmember()
+ self.expect_exception(KeyError)
+ # Otherwise, this block should never enter.
+ else:
+ self.expect_any_tree(component)
+ self.expect_file('flaglink', content='overwrite')
+ self.expect_file('../newfile', content='new')
+ self.expect_file('escape', type=tarfile.SYMTYPE)
+ self.expect_file('a', symlink_to=component)
+
+ for filter in 'tar', 'data':
+ with self.subTest(filter), self.check_context(arc.open(), filter=filter):
+ exc = self.expect_exception((OSError, KeyError))
+ if isinstance(exc, OSError):
+ if sys.platform == 'win32':
+ # 3: ERROR_PATH_NOT_FOUND
+ # 5: ERROR_ACCESS_DENIED
+ # 206: ERROR_FILENAME_EXCED_RANGE
+ self.assertIn(exc.winerror, (3, 5, 206))
+ else:
+ self.assertEqual(exc.errno, errno.ENAMETOOLONG)
+
@symlink_test
def test_parent_symlink2(self):
# Test interplaying symlinks
@@ -3762,8 +3913,8 @@ class TestExtractionFilters(unittest.TestCase):
arc.add('symlink2', symlink_to=os.path.join(
'linkdir', 'hardlink2'))
arc.add('targetdir/target', size=3)
- arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
- arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
+ arc.add('linkdir/hardlink', hardlink_to=os.path.join('targetdir', 'target'))
+ arc.add('linkdir/hardlink2', hardlink_to=os.path.join('linkdir', 'symlink'))
for filter in 'tar', 'data', 'fully_trusted':
with self.check_context(arc.open(), filter):
@@ -3779,6 +3930,129 @@ class TestExtractionFilters(unittest.TestCase):
self.expect_file('linkdir/symlink', size=3)
self.expect_file('symlink2', size=3)
+ @symlink_test
+ def test_sneaky_hardlink_fallback(self):
+ # (CVE-2025-4330)
+ # Test that when hardlink extraction falls back to extracting members
+ # from the archive, the extracted member is (re-)filtered.
+ with ArchiveMaker() as arc:
+ # Create a directory structure so the c/escape symlink stays
+ # inside the path
+ arc.add("a/t/dummy")
+ # Create b/ directory
+ arc.add("b/")
+ # Point "c" to the bottom of the tree in "a"
+ arc.add("c", symlink_to=os.path.join("a", "t"))
+ # link to non-existant location under "a"
+ arc.add("c/escape", symlink_to=os.path.join("..", "..",
+ "link_here"))
+ # Move "c" to point to "b" ("c/escape" no longer exists)
+ arc.add("c", symlink_to="b")
+ # Attempt to create a hard link to "c/escape". Since it doesn't
+ # exist it will attempt to extract "cescape" but at "boom".
+ arc.add("boom", hardlink_to=os.path.join("c", "escape"))
+
+ with self.check_context(arc.open(), 'data'):
+ if not os_helper.can_symlink():
+ # When 'c/escape' is extracted, 'c' is a regular
+ # directory, and 'c/escape' *would* point outside
+ # the destination if symlinks were allowed.
+ self.expect_exception(
+ tarfile.LinkOutsideDestinationError)
+ elif sys.platform == "win32":
+ # On Windows, 'c/escape' points outside the destination
+ self.expect_exception(tarfile.LinkOutsideDestinationError)
+ else:
+ e = self.expect_exception(
+ tarfile.LinkFallbackError,
+ "link 'boom' would be extracted as a copy of "
+ + "'c/escape', which was rejected")
+ self.assertIsInstance(e.__cause__,
+ tarfile.LinkOutsideDestinationError)
+ for filter in 'tar', 'fully_trusted':
+ with self.subTest(filter), self.check_context(arc.open(), filter):
+ if not os_helper.can_symlink():
+ self.expect_file("a/t/dummy")
+ self.expect_file("b/")
+ self.expect_file("c/")
+ else:
+ self.expect_file("a/t/dummy")
+ self.expect_file("b/")
+ self.expect_file("a/t/escape", symlink_to='../../link_here')
+ self.expect_file("boom", symlink_to='../../link_here')
+ self.expect_file("c", symlink_to='b')
+
+ @symlink_test
+ def test_exfiltration_via_symlink(self):
+ # (CVE-2025-4138)
+ # Test changing symlinks that result in a symlink pointing outside
+ # the extraction directory, unless prevented by 'data' filter's
+ # normalization.
+ with ArchiveMaker() as arc:
+ arc.add("escape", symlink_to=os.path.join('link', 'link', '..', '..', 'link-here'))
+ arc.add("link", symlink_to='./')
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter):
+ if os_helper.can_symlink():
+ self.expect_file("link", symlink_to='./')
+ if filter == 'data':
+ self.expect_file("escape", symlink_to='link-here')
+ else:
+ self.expect_file("escape",
+ symlink_to='link/link/../../link-here')
+ else:
+ # Nothing is extracted.
+ pass
+
+ @symlink_test
+ def test_chmod_outside_dir(self):
+ # (CVE-2024-12718)
+ # Test that members used for delayed updates of directory metadata
+ # are (re-)filtered.
+ with ArchiveMaker() as arc:
+ # "pwn" is a veeeery innocent symlink:
+ arc.add("a/pwn", symlink_to='.')
+ # But now "pwn" is also a directory, so it's scheduled to have its
+ # metadata updated later:
+ arc.add("a/pwn/", mode='drwxrwxrwx')
+ # Oops, "pwn" is not so innocent any more:
+ arc.add("a/pwn", symlink_to='x/../')
+ # Newly created symlink points to the dest dir,
+ # so it's OK for the "data" filter.
+ arc.add('a/x', symlink_to=('../'))
+ # But now "pwn" points outside the dest dir
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter) as cc:
+ if not os_helper.can_symlink():
+ self.expect_file("a/pwn/")
+ elif filter == 'data':
+ self.expect_file("a/x", symlink_to='../')
+ self.expect_file("a/pwn", symlink_to='.')
+ else:
+ self.expect_file("a/x", symlink_to='../')
+ self.expect_file("a/pwn", symlink_to='x/../')
+ if sys.platform != "win32":
+ st_mode = cc.outerdir.stat().st_mode
+ self.assertNotEqual(st_mode & 0o777, 0o777)
+
+ def test_link_fallback_normalizes(self):
+ # Make sure hardlink fallbacks work for non-normalized paths for all
+ # filters
+ with ArchiveMaker() as arc:
+ arc.add("dir/")
+ arc.add("dir/../afile")
+ arc.add("link1", hardlink_to='dir/../afile')
+ arc.add("link2", hardlink_to='dir/../dir/../afile')
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter) as cc:
+ self.expect_file("dir/")
+ self.expect_file("afile")
+ self.expect_file("link1")
+ self.expect_file("link2")
+
def test_modes(self):
# Test how file modes are extracted
# (Note that the modes are ignored on platforms without working chmod)
@@ -3892,7 +4166,7 @@ class TestExtractionFilters(unittest.TestCase):
# The 'tar' filter returns TarInfo objects with the same name/type.
# (It can also fail for particularly "evil" input, but we don't have
# that in the test archive.)
- with tarfile.TarFile.open(tarname) as tar:
+ with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar:
for tarinfo in tar.getmembers():
filtered = tarfile.tar_filter(tarinfo, '')
self.assertIs(filtered.name, tarinfo.name)
@@ -3901,7 +4175,7 @@ class TestExtractionFilters(unittest.TestCase):
def test_data_filter(self):
# The 'data' filter either raises, or returns TarInfo with the same
# name/type.
- with tarfile.TarFile.open(tarname) as tar:
+ with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar:
for tarinfo in tar.getmembers():
try:
filtered = tarfile.data_filter(tarinfo, '')
@@ -4031,13 +4305,13 @@ class TestExtractionFilters(unittest.TestCase):
# If errorlevel is 0, errors affected by errorlevel are ignored
with self.check_context(arc.open(errorlevel=0), extracterror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=0), filtererror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=0), oserror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=0), tarerror_filter):
self.expect_exception(tarfile.TarError)
@@ -4048,7 +4322,7 @@ class TestExtractionFilters(unittest.TestCase):
# If 1, all fatal errors are raised
with self.check_context(arc.open(errorlevel=1), extracterror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=1), filtererror_filter):
self.expect_exception(tarfile.FilterError)
diff --git a/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst b/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst
new file mode 100644
index 0000000..e3f984a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst
@@ -0,0 +1,6 @@
+Fixes multiple issues that allowed ``tarfile`` extraction filters
+(``filter="data"`` and ``filter="tar"``) to be bypassed using crafted
+symlinks and hard links.
+
+Addresses CVE 2024-12718, CVE 2025-4138, CVE 2025-4330, and CVE 2025-4517.
+
--
2.49.0