cloud-init/SOURCES/ci-Make-user-vendor-data-sensitive-and-remove-log-permi.patch
2023-09-27 12:45:57 +00:00

310 lines
12 KiB
Diff

From dd1a79fc5c0b5f486ca2e66ed3a45c8f4f7b1f15 Mon Sep 17 00:00:00 2001
From: James Falcon <james.falcon@canonical.com>
Date: Wed, 26 Apr 2023 15:11:55 -0500
Subject: [PATCH 2/2] Make user/vendor data sensitive and remove log
permissions (#2144)
RH-Author: Ani Sinha <None>
RH-MergeRequest: 99: Make user/vendor data sensitive and remove log permissions (#2144)
RH-Bugzilla: 2190081
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [1/1] 1b34e2c9c61a90abb88f2df87d41f96b54e79ff7 (anisinha/rhel-cloud-init)
Because user data and vendor data may contain sensitive information,
this commit ensures that any user data or vendor data written to
instance-data.json gets redacted and is only available to root user.
Also, modify the permissions of cloud-init.log to be 640, so that
sensitive data leaked to the log isn't world readable.
Additionally, remove the logging of user data and vendor data to
cloud-init.log from the Vultr datasource.
Conflicts:
cloudinit/sources/DataSourceVultr.py
- editor directives missing in file on upstream version.
LP: #2013967
CVE: CVE-2023-1786
(cherry picked from commit a378b7e4f47375458651c0972e7cd813f6fe0a6b)
Signed-off-by: Ani Sinha <anisinha@redhat.com>
---
cloudinit/sources/DataSourceLXD.py | 9 ++++++---
cloudinit/sources/DataSourceVultr.py | 14 ++++++--------
cloudinit/sources/__init__.py | 28 +++++++++++++++++++++++++---
cloudinit/stages.py | 4 +++-
tests/unittests/sources/test_init.py | 27 ++++++++++++++++++++++++++-
tests/unittests/test_stages.py | 18 +++++++++++-------
6 files changed, 77 insertions(+), 23 deletions(-)
diff --git a/cloudinit/sources/DataSourceLXD.py b/cloudinit/sources/DataSourceLXD.py
index ab440cc8..e4cae91a 100644
--- a/cloudinit/sources/DataSourceLXD.py
+++ b/cloudinit/sources/DataSourceLXD.py
@@ -14,7 +14,7 @@ import stat
import time
from enum import Flag, auto
from json.decoder import JSONDecodeError
-from typing import Any, Dict, List, Optional, Union, cast
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
import requests
from requests.adapters import HTTPAdapter
@@ -168,11 +168,14 @@ class DataSourceLXD(sources.DataSource):
_network_config: Union[Dict, str] = sources.UNSET
_crawled_metadata: Union[Dict, str] = sources.UNSET
- sensitive_metadata_keys = (
- "merged_cfg",
+ sensitive_metadata_keys: Tuple[
+ str, ...
+ ] = sources.DataSource.sensitive_metadata_keys + (
"user.meta-data",
"user.vendor-data",
"user.user-data",
+ "cloud-init.user-data",
+ "cloud-init.vendor-data",
)
skip_hotplug_detect = True
diff --git a/cloudinit/sources/DataSourceVultr.py b/cloudinit/sources/DataSourceVultr.py
index 9d7c84fb..660e9f14 100644
--- a/cloudinit/sources/DataSourceVultr.py
+++ b/cloudinit/sources/DataSourceVultr.py
@@ -5,6 +5,8 @@
# Vultr Metadata API:
# https://www.vultr.com/metadata/
+from typing import Tuple
+
import cloudinit.sources.helpers.vultr as vultr
from cloudinit import log as log
from cloudinit import sources, util, version
@@ -28,6 +30,10 @@ class DataSourceVultr(sources.DataSource):
dsname = "Vultr"
+ sensitive_metadata_keys: Tuple[
+ str, ...
+ ] = sources.DataSource.sensitive_metadata_keys + ("startup-script",)
+
def __init__(self, sys_cfg, distro, paths):
super(DataSourceVultr, self).__init__(sys_cfg, distro, paths)
self.ds_cfg = util.mergemanydict(
@@ -54,13 +60,8 @@ class DataSourceVultr(sources.DataSource):
self.get_datasource_data(self.metadata)
# Dump some data so diagnosing failures is manageable
- LOG.debug("Vultr Vendor Config:")
- LOG.debug(util.json_dumps(self.metadata["vendor-data"]))
LOG.debug("SUBID: %s", self.metadata["instance-id"])
LOG.debug("Hostname: %s", self.metadata["local-hostname"])
- if self.userdata_raw is not None:
- LOG.debug("User-Data:")
- LOG.debug(self.userdata_raw)
return True
@@ -146,7 +147,4 @@ if __name__ == "__main__":
config = md["vendor-data"]
sysinfo = vultr.get_sysinfo()
- print(util.json_dumps(sysinfo))
- print(util.json_dumps(config))
-
# vi: ts=4 expandtab
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index 565e1754..5c6ae8b1 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -110,7 +110,10 @@ def process_instance_metadata(metadata, key_path="", sensitive_keys=()):
sub_key_path = key_path + "/" + key
else:
sub_key_path = key
- if key in sensitive_keys or sub_key_path in sensitive_keys:
+ if (
+ key.lower() in sensitive_keys
+ or sub_key_path.lower() in sensitive_keys
+ ):
sens_keys.append(sub_key_path)
if isinstance(val, str) and val.startswith("ci-b64:"):
base64_encoded_keys.append(sub_key_path)
@@ -132,6 +135,12 @@ def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
Replace any keys values listed in 'sensitive_keys' with redact_value.
"""
+ # While 'sensitive_keys' should already sanitized to only include what
+ # is in metadata, it is possible keys will overlap. For example, if
+ # "merged_cfg" and "merged_cfg/ds/userdata" both match, it's possible that
+ # "merged_cfg" will get replaced first, meaning "merged_cfg/ds/userdata"
+ # no longer represents a valid key.
+ # Thus, we still need to do membership checks in this function.
if not metadata.get("sensitive_keys", []):
return metadata
md_copy = copy.deepcopy(metadata)
@@ -139,9 +148,14 @@ def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
path_parts = key_path.split("/")
obj = md_copy
for path in path_parts:
- if isinstance(obj[path], dict) and path != path_parts[-1]:
+ if (
+ path in obj
+ and isinstance(obj[path], dict)
+ and path != path_parts[-1]
+ ):
obj = obj[path]
- obj[path] = redact_value
+ if path in obj:
+ obj[path] = redact_value
return md_copy
@@ -249,6 +263,14 @@ class DataSource(CloudInitPickleMixin, metaclass=abc.ABCMeta):
sensitive_metadata_keys: Tuple[str, ...] = (
"merged_cfg",
"security-credentials",
+ "userdata",
+ "user-data",
+ "user_data",
+ "vendordata",
+ "vendor-data",
+ # Provide ds/vendor_data to avoid redacting top-level
+ # "vendor_data": {enabled: True}
+ "ds/vendor_data",
)
# True on datasources that may not see hotplugged devices reflected
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index a624a6fb..1326d205 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -204,7 +204,9 @@ class Init:
log_file = util.get_cfg_option_str(self.cfg, "def_log_file")
log_file_mode = util.get_cfg_option_int(self.cfg, "def_log_file_mode")
if log_file:
- util.ensure_file(log_file, mode=0o640, preserve_mode=True)
+ # At this point the log file should have already been created
+ # in the setupLogging function of log.py
+ util.ensure_file(log_file, mode=0o640, preserve_mode=False)
perms = self.cfg.get("syslog_fix_perms")
if not perms:
perms = {}
diff --git a/tests/unittests/sources/test_init.py b/tests/unittests/sources/test_init.py
index 0447e02c..eb27198f 100644
--- a/tests/unittests/sources/test_init.py
+++ b/tests/unittests/sources/test_init.py
@@ -458,12 +458,24 @@ class TestDataSource(CiTestCase):
"cred2": "othersekret",
}
},
+ "someother": {
+ "nested": {
+ "userData": "HIDE ME",
+ }
+ },
+ "VENDOR-DAta": "HIDE ME TOO",
},
)
self.assertCountEqual(
(
"merged_cfg",
"security-credentials",
+ "userdata",
+ "user-data",
+ "user_data",
+ "vendordata",
+ "vendor-data",
+ "ds/vendor_data",
),
datasource.sensitive_metadata_keys,
)
@@ -490,7 +502,9 @@ class TestDataSource(CiTestCase):
"base64_encoded_keys": [],
"merged_cfg": REDACT_SENSITIVE_VALUE,
"sensitive_keys": [
+ "ds/meta_data/VENDOR-DAta",
"ds/meta_data/some/security-credentials",
+ "ds/meta_data/someother/nested/userData",
"merged_cfg",
],
"sys_info": sys_info,
@@ -500,6 +514,7 @@ class TestDataSource(CiTestCase):
"availability_zone": "myaz",
"cloud-name": "subclasscloudname",
"cloud_name": "subclasscloudname",
+ "cloud_id": "subclasscloudname",
"distro": "ubuntu",
"distro_release": "focal",
"distro_version": "20.04",
@@ -522,14 +537,18 @@ class TestDataSource(CiTestCase):
"ds": {
"_doc": EXPERIMENTAL_TEXT,
"meta_data": {
+ "VENDOR-DAta": REDACT_SENSITIVE_VALUE,
"availability_zone": "myaz",
"local-hostname": "test-subclass-hostname",
"region": "myregion",
"some": {"security-credentials": REDACT_SENSITIVE_VALUE},
+ "someother": {
+ "nested": {"userData": REDACT_SENSITIVE_VALUE}
+ },
},
},
}
- self.assertCountEqual(expected, redacted)
+ self.assertEqual(expected, redacted)
file_stat = os.stat(json_file)
self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode))
@@ -574,6 +593,12 @@ class TestDataSource(CiTestCase):
(
"merged_cfg",
"security-credentials",
+ "userdata",
+ "user-data",
+ "user_data",
+ "vendordata",
+ "vendor-data",
+ "ds/vendor_data",
),
datasource.sensitive_metadata_keys,
)
diff --git a/tests/unittests/test_stages.py b/tests/unittests/test_stages.py
index 15a7e973..a61f9df9 100644
--- a/tests/unittests/test_stages.py
+++ b/tests/unittests/test_stages.py
@@ -606,19 +606,23 @@ class TestInit_InitializeFilesystem:
# Assert we create it 0o640 by default if it doesn't already exist
assert 0o640 == stat.S_IMODE(log_file.stat().mode)
- def test_existing_file_permissions_are_not_modified(self, init, tmpdir):
- """If the log file already exists, we should not modify its permissions
+ def test_existing_file_permissions(self, init, tmpdir):
+ """Test file permissions are set as expected.
+
+ CIS Hardening requires 640 permissions. These permissions are
+ currently hardcoded on every boot, but if there's ever a reason
+ to change this, we need to then ensure that they
+ are *not* set every boot.
See https://bugs.launchpad.net/cloud-init/+bug/1900837.
"""
- # Use a mode that will never be made the default so this test will
- # always be valid
- mode = 0o606
log_file = tmpdir.join("cloud-init.log")
log_file.ensure()
- log_file.chmod(mode)
+ # Use a mode that will never be made the default so this test will
+ # always be valid
+ log_file.chmod(0o606)
init._cfg = {"def_log_file": str(log_file)}
init._initialize_filesystem()
- assert mode == stat.S_IMODE(log_file.stat().mode)
+ assert 0o640 == stat.S_IMODE(log_file.stat().mode)
--
2.37.3