From 8b70605b594b3831331a9340ba764ff751871612 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Mon, 6 Mar 2023 17:24:24 +0100
Subject: [PATCH 2/2] CVE-2007-4559, PEP-706: Add filters for tarfile
 extraction (downstream)

Add and test RHEL-specific ways of configuring the default behavior: environment
variable and config file.
---
 Lib/tarfile.py           |  42 +++++++++++++
 Lib/test/test_shutil.py  |   3 +-
 Lib/test/test_tarfile.py | 128 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 169 insertions(+), 4 deletions(-)

diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index b6ad7dbe2a4..dc7050b2c63 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -72,6 +72,13 @@ __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
            "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
            "DEFAULT_FORMAT", "open"]
 
+# If true, use the safer (but backwards-incompatible) 'tar' extraction filter,
+# rather than 'fully_trusted', by default.
+# The emitted warning is changed to match.
+_RH_SAFER_DEFAULT = True
+
+# System-wide configuration file
+_CONFIG_FILENAME = '/etc/python/tarfile.cfg'
 
 #---------------------------------------------------------
 # tar constants
@@ -2197,6 +2204,41 @@ class TarFile(object):
         if filter is None:
             filter = self.extraction_filter
             if filter is None:
+                name = os.environ.get('PYTHON_TARFILE_EXTRACTION_FILTER')
+                if name is None:
+                    try:
+                        file = bltn_open(_CONFIG_FILENAME)
+                    except FileNotFoundError:
+                        pass
+                    else:
+                        import configparser
+                        conf = configparser.ConfigParser(
+                            interpolation=None,
+                            comment_prefixes=('#', ),
+                        )
+                        with file:
+                            conf.read_file(file)
+                        name = conf.get('tarfile',
+                                        'PYTHON_TARFILE_EXTRACTION_FILTER',
+                                        fallback='')
+                if name:
+                    try:
+                        filter = _NAMED_FILTERS[name]
+                    except KeyError:
+                        raise ValueError(f"filter {filter!r} not found") from None
+                    self.extraction_filter = filter
+                    return filter
+                if _RH_SAFER_DEFAULT:
+                    warnings.warn(
+                        'The default behavior of tarfile extraction has been '
+                        + 'changed to disallow common exploits '
+                        + '(including CVE-2007-4559). '
+                        + 'By default, absolute/parent paths are disallowed '
+                        + 'and some mode bits are cleared. '
+                        + 'See https://access.redhat.com/articles/7004769 '
+                        + 'for more details.',
+                        RuntimeWarning)
+                    return tar_filter
                 return fully_trusted_filter
             if isinstance(filter, str):
                 raise TypeError(
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index 9041e7aa368..1eb1116cc10 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -1613,7 +1613,8 @@ class TestArchives(BaseTest, unittest.TestCase):
     def check_unpack_tarball(self, format):
         self.check_unpack_archive(format, filter='fully_trusted')
         self.check_unpack_archive(format, filter='data')
-        with warnings_helper.check_no_warnings(self):
+        with warnings_helper.check_warnings(
+                ('.*CVE-2007-4559', RuntimeWarning)):
             self.check_unpack_archive(format)
 
     def test_unpack_archive_tar(self):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index a66f7efd2d6..6fd3c384b5c 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -2,7 +2,7 @@ import sys
 import os
 import io
 from hashlib import sha256
-from contextlib import contextmanager
+from contextlib import contextmanager, ExitStack
 from random import Random
 import pathlib
 import shutil
@@ -2929,7 +2929,11 @@ class NoneInfoExtractTests(ReadTest):
         tar = tarfile.open(tarname, mode='r', encoding="iso8859-1")
         cls.control_dir = pathlib.Path(TEMPDIR) / "extractall_ctrl"
         tar.errorlevel = 0
-        tar.extractall(cls.control_dir, filter=cls.extraction_filter)
+        with ExitStack() as cm:
+            if cls.extraction_filter is None:
+                cm.enter_context(warnings.catch_warnings())
+                warnings.simplefilter(action="ignore", category=RuntimeWarning)
+            tar.extractall(cls.control_dir, filter=cls.extraction_filter)
         tar.close()
         cls.control_paths = set(
             p.relative_to(cls.control_dir)
@@ -3592,7 +3596,8 @@ class TestExtractionFilters(unittest.TestCase):
         """Ensure the default filter does not warn (like in 3.12)"""
         with ArchiveMaker() as arc:
             arc.add('foo')
-        with warnings_helper.check_no_warnings(self):
+        with warnings_helper.check_warnings(
+                ('.*CVE-2007-4559', RuntimeWarning)):
             with self.check_context(arc.open(), None):
                 self.expect_file('foo')
 
@@ -3762,6 +3767,123 @@ class TestExtractionFilters(unittest.TestCase):
             self.expect_exception(TypeError)  # errorlevel is not int
 
 
+    @contextmanager
+    def rh_config_context(self, config_lines=None):
+        """Set up for testing various ways of overriding the default filter
+
+        return a triple with:
+        - temporary directory
+        - EnvironmentVarGuard()
+        - a test archive for use with check_* methods below
+
+        If config_lines is given, write them to the config file. Otherwise
+        the config file is missing.
+        """
+        tempdir = pathlib.Path(TEMPDIR) / 'tmp'
+        configfile = tempdir / 'tarfile.cfg'
+        with ArchiveMaker() as arc:
+            arc.add('good')
+            arc.add('ugly', symlink_to='/etc/passwd')
+            arc.add('../bad')
+        with (
+                support.temp_dir(tempdir),
+                support.swap_attr(tarfile, '_CONFIG_FILENAME', str(configfile)),
+                support.EnvironmentVarGuard() as env,
+                arc.open() as tar,
+        ):
+            if config_lines is not None:
+                with configfile.open('w') as f:
+                    for line in config_lines:
+                        print(line, file=f)
+            yield tempdir, env, tar
+
+    def check_rh_default_behavior(self, tar, tempdir):
+        """Check RH default: warn and refuse to extract dangerous files."""
+        with (
+                warnings_helper.check_warnings(
+                    ('.*CVE-2007-4559', RuntimeWarning)),
+                self.assertRaises(tarfile.OutsideDestinationError),
+        ):
+            tar.extractall(tempdir / 'outdir')
+
+    def check_trusted_default(self, tar, tempdir):
+        """Check 'fully_trusted' is configured as the default filter."""
+        with (
+                warnings_helper.check_no_warnings(self),
+        ):
+            tar.extractall(tempdir / 'outdir')
+            self.assertTrue((tempdir / 'outdir/good').exists())
+            self.assertEqual((tempdir / 'outdir/ugly').readlink(),
+                             pathlib.Path('/etc/passwd'))
+            self.assertTrue((tempdir / 'bad').exists())
+
+    def test_rh_default_no_conf(self):
+        with self.rh_config_context() as (tempdir, env, tar):
+            self.check_rh_default_behavior(tar, tempdir)
+
+    def test_rh_default_from_file(self):
+        lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=fully_trusted']
+        with self.rh_config_context(lines) as (tempdir, env, tar):
+            self.check_trusted_default(tar, tempdir)
+
+    def test_rh_empty_config_file(self):
+        """Empty config file -> default behavior"""
+        lines = []
+        with self.rh_config_context(lines) as (tempdir, env, tar):
+            self.check_rh_default_behavior(tar, tempdir)
+
+    def test_empty_config_section(self):
+        """Empty section in config file -> default behavior"""
+        lines = ['[tarfile]']
+        with self.rh_config_context(lines) as (tempdir, env, tar):
+            self.check_rh_default_behavior(tar, tempdir)
+
+    def test_rh_default_empty_config_option(self):
+        """Empty option value in config file -> default behavior"""
+        lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=']
+        with self.rh_config_context(lines) as (tempdir, env, tar):
+            self.check_rh_default_behavior(tar, tempdir)
+
+    def test_bad_config_option(self):
+        """Bad option value in config file -> ValueError"""
+        lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=unknown!']
+        with self.rh_config_context(lines) as (tempdir, env, tar):
+            with self.assertRaises(ValueError):
+                tar.extractall(tempdir / 'outdir')
+
+    def test_default_from_envvar(self):
+        with self.rh_config_context() as (tempdir, env, tar):
+            env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'fully_trusted'
+            self.check_trusted_default(tar, tempdir)
+
+    def test_empty_envvar(self):
+        """Empty env variable -> default behavior"""
+        with self.rh_config_context() as (tempdir, env, tar):
+            env['PYTHON_TARFILE_EXTRACTION_FILTER'] = ''
+            self.check_rh_default_behavior(tar, tempdir)
+
+    def test_bad_envvar(self):
+        with self.rh_config_context() as (tempdir, env, tar):
+            env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'unknown!'
+            with self.assertRaises(ValueError):
+                tar.extractall(tempdir / 'outdir')
+
+    def test_envvar_overrides_file(self):
+        lines = ['[tarfile]', 'PYTHON_TARFILE_EXTRACTION_FILTER=data']
+        with self.rh_config_context(lines) as (tempdir, env, tar):
+            env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'fully_trusted'
+            self.check_trusted_default(tar, tempdir)
+
+    def test_monkeypatch_overrides_envvar(self):
+        with self.rh_config_context(None) as (tempdir, env, tar):
+            env['PYTHON_TARFILE_EXTRACTION_FILTER'] = 'data'
+            with support.swap_attr(
+                    tarfile.TarFile, 'extraction_filter',
+                    staticmethod(tarfile.fully_trusted_filter)
+            ):
+                self.check_trusted_default(tar, tempdir)
+
+
 def setUpModule():
     support.unlink(TEMPDIR)
     os.makedirs(TEMPDIR)
-- 
2.40.1