cloud-init/SOURCES/ci-write-passwords-only-to-...

From 769b9f8c9b1ecc294a197575108ae7cb54ad7f4b Mon Sep 17 00:00:00 2001
From: Eduardo Otubo <otubo@redhat.com>
Date: Mon, 5 Jul 2021 14:13:45 +0200
Subject: [PATCH] write passwords only to serial console, lock down
 cloud-init-output.log (#847)

RH-Author: Eduardo Otubo <otubo@redhat.com>
RH-MergeRequest: 21: write passwords only to serial console, lock down cloud-init-output.log (#847)
RH-Commit: [1/1] 8f30f2b7d0d6f9dca19994dbd0827b44e998f238 (otubo/cloud-init)
RH-Bugzilla: 1945891
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Mohamed Gamal Morsy <mmorsy@redhat.com>

commit b794d426b9ab43ea9d6371477466070d86e10668
Author: Daniel Watkins <oddbloke@ubuntu.com>
Date:   Fri Mar 19 10:06:42 2021 -0400

    write passwords only to serial console, lock down cloud-init-output.log (#847)

    Prior to this commit, when a user specified configuration which would
    generate random passwords for users, cloud-init would cause those
    passwords to be written to the serial console by emitting them on
    stderr.  In the default configuration, any stdout or stderr emitted by
    cloud-init is also written to `/var/log/cloud-init-output.log`.  This
    file is world-readable, meaning that those randomly-generated passwords
    were available to be read by any user with access to the system.  This
    presents an obvious security issue.

    This commit responds to this issue in two ways:

    * We address the direct issue by moving from writing the passwords to
      sys.stderr to writing them directly to /dev/console (via
      util.multi_log); this means that the passwords will never end up in
      cloud-init-output.log
    * To avoid future issues like this, we also modify the logging code so
      that any files created in a log sink subprocess will only be
      owner/group readable and, if it exists, will be owned by the adm
      group.  This results in `/var/log/cloud-init-output.log` no longer
      being world-readable, meaning that if there are other parts of the
      codebase that are emitting sensitive data intended for the serial
      console, that data is no longer available to all users of the system.

    LP: #1918303

Signed-off-by: Eduardo Otubo <otubo@redhat.com>
---
 cloudinit/config/cc_set_passwords.py          |  5 +-
 cloudinit/config/tests/test_set_passwords.py  | 40 +++++++++----
 cloudinit/tests/test_util.py                  | 56 +++++++++++++++++++
 cloudinit/util.py                             | 38 +++++++++++--
 .../modules/test_set_password.py              | 24 ++++++++
 tests/integration_tests/test_logging.py       | 22 ++++++++
 tests/unittests/test_util.py                  |  4 ++
 7 files changed, 173 insertions(+), 16 deletions(-)
 create mode 100644 tests/integration_tests/test_logging.py

diff --git a/cloudinit/config/cc_set_passwords.py b/cloudinit/config/cc_set_passwords.py
index d6b5682d..433de751 100755
--- a/cloudinit/config/cc_set_passwords.py
+++ b/cloudinit/config/cc_set_passwords.py
@@ -78,7 +78,6 @@ password.
 """

 import re
-import sys

 from cloudinit.distros import ug_util
 from cloudinit import log as logging
@@ -214,7 +213,9 @@ def handle(_name, cfg, cloud, log, args):
         if len(randlist):
             blurb = ("Set the following 'random' passwords\n",
                      '\n'.join(randlist))
-            sys.stderr.write("%s\n%s\n" % blurb)
+            util.multi_log(
+                "%s\n%s\n" % blurb, stderr=False, fallback_to_stdout=False
+            )

         if expire:
             expired_users = []
diff --git a/cloudinit/config/tests/test_set_passwords.py b/cloudinit/config/tests/test_set_passwords.py
index daa1ef51..bbe2ee8f 100644
--- a/cloudinit/config/tests/test_set_passwords.py
+++ b/cloudinit/config/tests/test_set_passwords.py
@@ -74,10 +74,6 @@ class TestSetPasswordsHandle(CiTestCase):

     with_logs = True

-    def setUp(self):
-        super(TestSetPasswordsHandle, self).setUp()
-        self.add_patch('cloudinit.config.cc_set_passwords.sys.stderr', 'm_err')
-
     def test_handle_on_empty_config(self, *args):
         """handle logs that no password has changed when config is empty."""
         cloud = self.tmp_cloud(distro='ubuntu')
@@ -129,10 +125,12 @@ class TestSetPasswordsHandle(CiTestCase):
             mock.call(['pw', 'usermod', 'ubuntu', '-p', '01-Jan-1970'])],
             m_subp.call_args_list)

+    @mock.patch(MODPATH + "util.multi_log")
     @mock.patch(MODPATH + "util.is_BSD")
     @mock.patch(MODPATH + "subp.subp")
-    def test_handle_on_chpasswd_list_creates_random_passwords(self, m_subp,
-                                                              m_is_bsd):
+    def test_handle_on_chpasswd_list_creates_random_passwords(
+        self, m_subp, m_is_bsd, m_multi_log
+    ):
         """handle parses command set random passwords."""
         m_is_bsd.return_value = False
         cloud = self.tmp_cloud(distro='ubuntu')
@@ -146,10 +144,32 @@ class TestSetPasswordsHandle(CiTestCase):
         self.assertIn(
             'DEBUG: Handling input for chpasswd as list.',
             self.logs.getvalue())
-        self.assertNotEqual(
-            [mock.call(['chpasswd'],
-             '\n'.join(valid_random_pwds) + '\n')],
-            m_subp.call_args_list)
+
+        self.assertEqual(1, m_subp.call_count)
+        args, _kwargs = m_subp.call_args
+        self.assertEqual(["chpasswd"], args[0])
+
+        stdin = args[1]
+        user_pass = {
+            user: password
+            for user, password
+            in (line.split(":") for line in stdin.splitlines())
+        }
+
+        self.assertEqual(1, m_multi_log.call_count)
+        self.assertEqual(
+            mock.call(mock.ANY, stderr=False, fallback_to_stdout=False),
+            m_multi_log.call_args
+        )
+
+        self.assertEqual(set(["root", "ubuntu"]), set(user_pass.keys()))
+        written_lines = m_multi_log.call_args[0][0].splitlines()
+        for password in user_pass.values():
+            for line in written_lines:
+                if password in line:
+                    break
+            else:
+                self.fail("Password not emitted to console")


 # vi: ts=4 expandtab
diff --git a/cloudinit/tests/test_util.py b/cloudinit/tests/test_util.py
index b7a302f1..e811917e 100644
--- a/cloudinit/tests/test_util.py
+++ b/cloudinit/tests/test_util.py
@@ -851,4 +851,60 @@ class TestEnsureFile:
         assert "ab" == kwargs["omode"]


+@mock.patch("cloudinit.util.grp.getgrnam")
+@mock.patch("cloudinit.util.os.setgid")
+@mock.patch("cloudinit.util.os.umask")
+class TestRedirectOutputPreexecFn:
+    """This tests specifically the preexec_fn used in redirect_output."""
+
+    @pytest.fixture(params=["outfmt", "errfmt"])
+    def preexec_fn(self, request):
+        """A fixture to gather the preexec_fn used by redirect_output.
+
+        This enables simpler direct testing of it, and parameterises any tests
+        using it to cover both the stdout and stderr code paths.
+        """
+        test_string = "| piped output to invoke subprocess"
+        if request.param == "outfmt":
+            args = (test_string, None)
+        elif request.param == "errfmt":
+            args = (None, test_string)
+        with mock.patch("cloudinit.util.subprocess.Popen") as m_popen:
+            util.redirect_output(*args)
+
+        assert 1 == m_popen.call_count
+        _args, kwargs = m_popen.call_args
+        assert "preexec_fn" in kwargs, "preexec_fn not passed to Popen"
+        return kwargs["preexec_fn"]
+
+    def test_preexec_fn_sets_umask(
+        self, m_os_umask, _m_setgid, _m_getgrnam, preexec_fn
+    ):
+        """preexec_fn should set a mask that avoids world-readable files."""
+        preexec_fn()
+
+        assert [mock.call(0o037)] == m_os_umask.call_args_list
+
+    def test_preexec_fn_sets_group_id_if_adm_group_present(
+        self, _m_os_umask, m_setgid, m_getgrnam, preexec_fn
+    ):
+        """We should setgrp to adm if present, so files are owned by them."""
+        fake_group = mock.Mock(gr_gid=mock.sentinel.gr_gid)
+        m_getgrnam.return_value = fake_group
+
+        preexec_fn()
+
+        assert [mock.call("adm")] == m_getgrnam.call_args_list
+        assert [mock.call(mock.sentinel.gr_gid)] == m_setgid.call_args_list
+
+    def test_preexec_fn_handles_absent_adm_group_gracefully(
+        self, _m_os_umask, m_setgid, m_getgrnam, preexec_fn
+    ):
+        """We should handle an absent adm group gracefully."""
+        m_getgrnam.side_effect = KeyError("getgrnam(): name not found: 'adm'")
+
+        preexec_fn()
+
+        assert 0 == m_setgid.call_count
+
 # vi: ts=4 expandtab
diff --git a/cloudinit/util.py b/cloudinit/util.py
index 769f3425..4e0a72db 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -359,7 +359,7 @@ def find_modules(root_dir):


 def multi_log(text, console=True, stderr=True,
-              log=None, log_level=logging.DEBUG):
+              log=None, log_level=logging.DEBUG, fallback_to_stdout=True):
     if stderr:
         sys.stderr.write(text)
     if console:
@@ -368,7 +368,7 @@ def multi_log(text, console=True, stderr=True,
             with open(conpath, 'w') as wfh:
                 wfh.write(text)
                 wfh.flush()
-        else:
+        elif fallback_to_stdout:
             # A container may lack /dev/console (arguably a container bug).  If
             # it does not exist, then write output to stdout.  this will result
             # in duplicate stderr and stdout messages if stderr was True.
@@ -623,6 +623,26 @@ def redirect_output(outfmt, errfmt, o_out=None, o_err=None):
     if not o_err:
         o_err = sys.stderr

+    # pylint: disable=subprocess-popen-preexec-fn
+    def set_subprocess_umask_and_gid():
+        """Reconfigure umask and group ID to create output files securely.
+
+        This is passed to subprocess.Popen as preexec_fn, so it is executed in
+        the context of the newly-created process.  It:
+
+        * sets the umask of the process so created files aren't world-readable
+        * if an adm group exists in the system, sets that as the process' GID
+          (so that the created file(s) are owned by root:adm)
+        """
+        os.umask(0o037)
+        try:
+            group_id = grp.getgrnam("adm").gr_gid
+        except KeyError:
+            # No adm group, don't set a group
+            pass
+        else:
+            os.setgid(group_id)
+
     if outfmt:
         LOG.debug("Redirecting %s to %s", o_out, outfmt)
         (mode, arg) = outfmt.split(" ", 1)
@@ -632,7 +652,12 @@ def redirect_output(outfmt, errfmt, o_out=None, o_err=None):
                 owith = "wb"
             new_fp = open(arg, owith)
         elif mode == "|":
-            proc = subprocess.Popen(arg, shell=True, stdin=subprocess.PIPE)
+            proc = subprocess.Popen(
+                arg,
+                shell=True,
+                stdin=subprocess.PIPE,
+                preexec_fn=set_subprocess_umask_and_gid,
+            )
             new_fp = proc.stdin
         else:
             raise TypeError("Invalid type for output format: %s" % outfmt)
@@ -654,7 +679,12 @@ def redirect_output(outfmt, errfmt, o_out=None, o_err=None):
                 owith = "wb"
             new_fp = open(arg, owith)
         elif mode == "|":
-            proc = subprocess.Popen(arg, shell=True, stdin=subprocess.PIPE)
+            proc = subprocess.Popen(
+                arg,
+                shell=True,
+                stdin=subprocess.PIPE,
+                preexec_fn=set_subprocess_umask_and_gid,
+            )
             new_fp = proc.stdin
         else:
             raise TypeError("Invalid type for error format: %s" % errfmt)
diff --git a/tests/integration_tests/modules/test_set_password.py b/tests/integration_tests/modules/test_set_password.py
index b13f76fb..d7cf91a5 100644
--- a/tests/integration_tests/modules/test_set_password.py
+++ b/tests/integration_tests/modules/test_set_password.py
@@ -116,6 +116,30 @@ class Mixin:
         # Which are not the same
         assert shadow_users["harry"] != shadow_users["dick"]

+    def test_random_passwords_not_stored_in_cloud_init_output_log(
+        self, class_client
+    ):
+        """We should not emit passwords to the in-instance log file.
+
+        LP: #1918303
+        """
+        cloud_init_output = class_client.read_from_file(
+            "/var/log/cloud-init-output.log"
+        )
+        assert "dick:" not in cloud_init_output
+        assert "harry:" not in cloud_init_output
+
+    def test_random_passwords_emitted_to_serial_console(self, class_client):
+        """We should emit passwords to the serial console. (LP: #1918303)"""
+        try:
+            console_log = class_client.instance.console_log()
+        except NotImplementedError:
+            # Assume that an exception here means that we can't use the console
+            # log
+            pytest.skip("NotImplementedError when requesting console log")
+        assert "dick:" in console_log
+        assert "harry:" in console_log
+
     def test_explicit_password_set_correctly(self, class_client):
         """Test that an explicitly-specified password is set correctly."""
         shadow_users, _ = self._fetch_and_parse_etc_shadow(class_client)
diff --git a/tests/integration_tests/test_logging.py b/tests/integration_tests/test_logging.py
new file mode 100644
index 00000000..b31a0434
--- /dev/null
+++ b/tests/integration_tests/test_logging.py
@@ -0,0 +1,22 @@
+"""Integration tests relating to cloud-init's logging."""
+
+
+class TestVarLogCloudInitOutput:
+    """Integration tests relating to /var/log/cloud-init-output.log."""
+
+    def test_var_log_cloud_init_output_not_world_readable(self, client):
+        """
+        The log can contain sensitive data, it shouldn't be world-readable.
+
+        LP: #1918303
+        """
+        # Check the file exists
+        assert client.execute("test -f /var/log/cloud-init-output.log").ok
+
+        # Check its permissions are as we expect
+        perms, user, group = client.execute(
+            "stat -c %a:%U:%G /var/log/cloud-init-output.log"
+        ).split(":")
+        assert "640" == perms
+        assert "root" == user
+        assert "adm" == group
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
index 857629f1..e5292001 100644
--- a/tests/unittests/test_util.py
+++ b/tests/unittests/test_util.py
@@ -572,6 +572,10 @@ class TestMultiLog(helpers.FilesystemMockingTestCase):
         util.multi_log(logged_string)
         self.assertEqual(logged_string, self.stdout.getvalue())

+    def test_logs_dont_go_to_stdout_if_fallback_to_stdout_is_false(self):
+        util.multi_log('something', fallback_to_stdout=False)
+        self.assertEqual('', self.stdout.getvalue())
+
     def test_logs_go_to_log_if_given(self):
         log = mock.MagicMock()
         logged_string = 'something very important'
--
2.27.0