e5162f63d5
Resolves: bz2024892 bz2024893 bz2025611 Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
1390 lines
57 KiB
Diff
1390 lines
57 KiB
Diff
From decd39b7799a0579ea085b0da0728b6eabd49b38 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Wed, 1 Sep 2021 00:28:58 -0400
|
|
Subject: [PATCH] [clean] Provide archive abstractions to obfuscate more than
|
|
sos archives
|
|
|
|
This commit removes the restriction imposed on `sos clean` since its
|
|
introduction in sos-4.0 to only work against known sos report archives
|
|
or build directories. This is because there has been interest in using
|
|
the obfuscation bits of sos in other data-collector projects.
|
|
|
|
The `SoSObfuscationArchive()` class has been revamped to now be an
|
|
abstraction for different types of archives, and the cleaner logic has
|
|
been updated to leverage this new abstraction rather than assuming we're
|
|
working on an sos archive.
|
|
|
|
Abstractions are added for our own native use cases - that being `sos
|
|
report` and `sos collect` for at-runtime obfuscation, as well as
|
|
standalone archives previously generated. Further generic abstractions
|
|
are available for plain directories and tarballs however these will not
|
|
provide the same level of coverage as fully supported archive types, as
|
|
is noted in the manpage for sos-clean.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
man/en/sos-clean.1 | 25 ++
|
|
sos/cleaner/__init__.py | 308 +++++++++---------
|
|
.../__init__.py} | 80 ++++-
|
|
sos/cleaner/archives/generic.py | 52 +++
|
|
sos/cleaner/archives/sos.py | 106 ++++++
|
|
sos/cleaner/parsers/__init__.py | 6 -
|
|
sos/cleaner/parsers/hostname_parser.py | 1 -
|
|
sos/cleaner/parsers/ip_parser.py | 1 -
|
|
sos/cleaner/parsers/keyword_parser.py | 1 -
|
|
sos/cleaner/parsers/mac_parser.py | 1 -
|
|
sos/cleaner/parsers/username_parser.py | 8 -
|
|
tests/cleaner_tests/existing_archive.py | 7 +
|
|
tests/cleaner_tests/full_report_run.py | 3 +
|
|
tests/cleaner_tests/report_with_mask.py | 3 +
|
|
14 files changed, 423 insertions(+), 179 deletions(-)
|
|
rename sos/cleaner/{obfuscation_archive.py => archives/__init__.py} (81%)
|
|
create mode 100644 sos/cleaner/archives/generic.py
|
|
create mode 100644 sos/cleaner/archives/sos.py
|
|
|
|
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
|
|
index b77bc63c..54026713 100644
|
|
--- a/man/en/sos-clean.1
|
|
+++ b/man/en/sos-clean.1
|
|
@@ -10,6 +10,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
|
|
[\-\-jobs]
|
|
[\-\-no-update]
|
|
[\-\-keep-binary-files]
|
|
+ [\-\-archive-type]
|
|
|
|
.SH DESCRIPTION
|
|
\fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
|
|
@@ -88,6 +89,30 @@ Users should review any archive that keeps binary files in place before sending
|
|
a third party.
|
|
|
|
Default: False (remove encountered binary files)
|
|
+.TP
|
|
+.B \-\-archive-type TYPE
|
|
+Specify the type of archive that TARGET was generated as.
|
|
+When sos inspects a TARGET archive, it tries to identify what type of archive it is.
|
|
+For example, it may be a report generated by \fBsos report\fR, or a collection of those
|
|
+reports generated by \fBsos collect\fR, which require separate approaches.
|
|
+
|
|
+This option may be useful if a given TARGET archive is known to be of a specific type,
|
|
+but due to unknown reasons or some malformed/missing information in the archive directly,
|
|
+that is not properly identified by sos.
|
|
+
|
|
+The following are accepted values for this option:
|
|
+
|
|
+ \fBauto\fR Automatically detect the archive type
|
|
+ \fBreport\fR An archive generated by \fBsos report\fR
|
|
+ \fBcollect\fR An archive generated by \fBsos collect\fR
|
|
+
|
|
+The following may also be used, however note that these do not attempt to pre-load
|
|
+any information from the archives into the parsers. This means that, among other limitations,
|
|
+items like host and domain names may not be obfuscated unless an obfuscated mapping already exists
|
|
+on the system from a previous execution.
|
|
+
|
|
+ \fBdata-dir\fR A plain directory on the filesystem.
|
|
+ \fBtarball\fR A generic tar archive not associated with any known tool
|
|
|
|
.SH SEE ALSO
|
|
.BR sos (1)
|
|
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
index 6aadfe79..6d2eb483 100644
|
|
--- a/sos/cleaner/__init__.py
|
|
+++ b/sos/cleaner/__init__.py
|
|
@@ -12,9 +12,7 @@ import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
-import re
|
|
import shutil
|
|
-import tarfile
|
|
import tempfile
|
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
@@ -27,7 +25,10 @@ from sos.cleaner.parsers.mac_parser import SoSMacParser
|
|
from sos.cleaner.parsers.hostname_parser import SoSHostnameParser
|
|
from sos.cleaner.parsers.keyword_parser import SoSKeywordParser
|
|
from sos.cleaner.parsers.username_parser import SoSUsernameParser
|
|
-from sos.cleaner.obfuscation_archive import SoSObfuscationArchive
|
|
+from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
|
|
+ SoSCollectorArchive,
|
|
+ SoSCollectorDirectory)
|
|
+from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
|
|
from sos.utilities import get_human_readable
|
|
from textwrap import fill
|
|
|
|
@@ -41,6 +42,7 @@ class SoSCleaner(SoSComponent):
|
|
desc = "Obfuscate sensitive networking information in a report"
|
|
|
|
arg_defaults = {
|
|
+ 'archive_type': 'auto',
|
|
'domains': [],
|
|
'jobs': 4,
|
|
'keywords': [],
|
|
@@ -70,6 +72,7 @@ class SoSCleaner(SoSComponent):
|
|
self.from_cmdline = False
|
|
if not hasattr(self.opts, 'jobs'):
|
|
self.opts.jobs = 4
|
|
+ self.opts.archive_type = 'auto'
|
|
self.soslog = logging.getLogger('sos')
|
|
self.ui_log = logging.getLogger('sos_ui')
|
|
# create the tmp subdir here to avoid a potential race condition
|
|
@@ -92,6 +95,17 @@ class SoSCleaner(SoSComponent):
|
|
SoSUsernameParser(self.cleaner_mapping, self.opts.usernames)
|
|
]
|
|
|
|
+ self.archive_types = [
|
|
+ SoSReportDirectory,
|
|
+ SoSReportArchive,
|
|
+ SoSCollectorDirectory,
|
|
+ SoSCollectorArchive,
|
|
+ # make sure these two are always last as they are fallbacks
|
|
+ DataDirArchive,
|
|
+ TarballArchive
|
|
+ ]
|
|
+ self.nested_archive = None
|
|
+
|
|
self.log_info("Cleaner initialized. From cmdline: %s"
|
|
% self.from_cmdline)
|
|
|
|
@@ -178,6 +192,11 @@ third party.
|
|
)
|
|
clean_grp.add_argument('target', metavar='TARGET',
|
|
help='The directory or archive to obfuscate')
|
|
+ clean_grp.add_argument('--archive-type', default='auto',
|
|
+ choices=['auto', 'report', 'collect',
|
|
+ 'data-dir', 'tarball'],
|
|
+ help=('Specify what kind of archive the target '
|
|
+ 'was generated as'))
|
|
clean_grp.add_argument('--domains', action='extend', default=[],
|
|
help='List of domain names to obfuscate')
|
|
clean_grp.add_argument('-j', '--jobs', default=4, type=int,
|
|
@@ -218,59 +237,28 @@ third party.
|
|
|
|
In the event the target path is not an archive, abort.
|
|
"""
|
|
- if not tarfile.is_tarfile(self.opts.target):
|
|
- self.ui_log.error(
|
|
- "Invalid target: must be directory or tar archive"
|
|
- )
|
|
- self._exit(1)
|
|
-
|
|
- archive = tarfile.open(self.opts.target)
|
|
- self.arc_name = self.opts.target.split('/')[-1].split('.')[:-2][0]
|
|
-
|
|
- try:
|
|
- archive.getmember(os.path.join(self.arc_name, 'sos_logs'))
|
|
- except Exception:
|
|
- # this is not an sos archive
|
|
- self.ui_log.error("Invalid target: not an sos archive")
|
|
- self._exit(1)
|
|
-
|
|
- # see if there are archives within this archive
|
|
- nested_archives = []
|
|
- for _file in archive.getmembers():
|
|
- if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
|
|
- (_file.name.endswith(('.md5', '.sha256')))):
|
|
- nested_archives.append(_file.name.split('/')[-1])
|
|
-
|
|
- if nested_archives:
|
|
- self.log_info("Found nested archive(s), extracting top level")
|
|
- nested_path = self.extract_archive(archive)
|
|
- for arc_file in os.listdir(nested_path):
|
|
- if re.match('sosreport.*.tar.*', arc_file):
|
|
- if arc_file.endswith(('.md5', '.sha256')):
|
|
- continue
|
|
- self.report_paths.append(os.path.join(nested_path,
|
|
- arc_file))
|
|
- # add the toplevel extracted archive
|
|
- self.report_paths.append(nested_path)
|
|
+ _arc = None
|
|
+ if self.opts.archive_type != 'auto':
|
|
+ check_type = self.opts.archive_type.replace('-', '_')
|
|
+ for archive in self.archive_types:
|
|
+ if archive.type_name == check_type:
|
|
+ _arc = archive(self.opts.target, self.tmpdir)
|
|
else:
|
|
- self.report_paths.append(self.opts.target)
|
|
-
|
|
- archive.close()
|
|
-
|
|
- def extract_archive(self, archive):
|
|
- """Extract an archive into our tmpdir so that we may inspect it or
|
|
- iterate through its contents for obfuscation
|
|
-
|
|
- Positional arguments:
|
|
-
|
|
- :param archive: An open TarFile object for the archive
|
|
-
|
|
- """
|
|
- if not isinstance(archive, tarfile.TarFile):
|
|
- archive = tarfile.open(archive)
|
|
- path = os.path.join(self.tmpdir, 'cleaner')
|
|
- archive.extractall(path)
|
|
- return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])
|
|
+ for arc in self.archive_types:
|
|
+ if arc.check_is_type(self.opts.target):
|
|
+ _arc = arc(self.opts.target, self.tmpdir)
|
|
+ break
|
|
+ if not _arc:
|
|
+ return
|
|
+ self.report_paths.append(_arc)
|
|
+ if _arc.is_nested:
|
|
+ self.report_paths.extend(_arc.get_nested_archives())
|
|
+ # We need to preserve the top level archive until all
|
|
+ # nested archives are processed
|
|
+ self.report_paths.remove(_arc)
|
|
+ self.nested_archive = _arc
|
|
+ if self.nested_archive:
|
|
+ self.nested_archive.ui_name = self.nested_archive.description
|
|
|
|
def execute(self):
|
|
"""SoSCleaner will begin by inspecting the TARGET option to determine
|
|
@@ -283,6 +271,7 @@ third party.
|
|
be unpacked, cleaned, and repacked and the final top-level archive will
|
|
then be repacked as well.
|
|
"""
|
|
+ self.arc_name = self.opts.target.split('/')[-1].split('.tar')[0]
|
|
if self.from_cmdline:
|
|
self.print_disclaimer()
|
|
self.report_paths = []
|
|
@@ -290,23 +279,11 @@ third party.
|
|
self.ui_log.error("Invalid target: no such file or directory %s"
|
|
% self.opts.target)
|
|
self._exit(1)
|
|
- if os.path.isdir(self.opts.target):
|
|
- self.arc_name = self.opts.target.split('/')[-1]
|
|
- for _file in os.listdir(self.opts.target):
|
|
- if _file == 'sos_logs':
|
|
- self.report_paths.append(self.opts.target)
|
|
- if (_file.startswith('sosreport') and
|
|
- (_file.endswith(".tar.gz") or _file.endswith(".tar.xz"))):
|
|
- self.report_paths.append(os.path.join(self.opts.target,
|
|
- _file))
|
|
- if not self.report_paths:
|
|
- self.ui_log.error("Invalid target: not an sos directory")
|
|
- self._exit(1)
|
|
- else:
|
|
- self.inspect_target_archive()
|
|
+
|
|
+ self.inspect_target_archive()
|
|
|
|
if not self.report_paths:
|
|
- self.ui_log.error("No valid sos archives or directories found\n")
|
|
+ self.ui_log.error("No valid archives or directories found\n")
|
|
self._exit(1)
|
|
|
|
# we have at least one valid target to obfuscate
|
|
@@ -334,33 +311,7 @@ third party.
|
|
|
|
final_path = None
|
|
if len(self.completed_reports) > 1:
|
|
- # we have an archive of archives, so repack the obfuscated tarball
|
|
- arc_name = self.arc_name + '-obfuscated'
|
|
- self.setup_archive(name=arc_name)
|
|
- for arc in self.completed_reports:
|
|
- if arc.is_tarfile:
|
|
- arc_dest = self.obfuscate_string(
|
|
- arc.final_archive_path.split('/')[-1]
|
|
- )
|
|
- self.archive.add_file(arc.final_archive_path,
|
|
- dest=arc_dest)
|
|
- checksum = self.get_new_checksum(arc.final_archive_path)
|
|
- if checksum is not None:
|
|
- dname = self.obfuscate_string(
|
|
- "checksums/%s.%s" % (arc_dest, self.hash_name)
|
|
- )
|
|
- self.archive.add_string(checksum, dest=dname)
|
|
- else:
|
|
- for dirname, dirs, files in os.walk(arc.archive_path):
|
|
- for filename in files:
|
|
- if filename.startswith('sosreport'):
|
|
- continue
|
|
- fname = os.path.join(dirname, filename)
|
|
- dnm = self.obfuscate_string(
|
|
- fname.split(arc.archive_name)[-1].lstrip('/')
|
|
- )
|
|
- self.archive.add_file(fname, dest=dnm)
|
|
- arc_path = self.archive.finalize(self.opts.compression_type)
|
|
+ arc_path = self.rebuild_nested_archive()
|
|
else:
|
|
arc = self.completed_reports[0]
|
|
arc_path = arc.final_archive_path
|
|
@@ -371,8 +322,7 @@ third party.
|
|
)
|
|
with open(os.path.join(self.sys_tmp, chksum_name), 'w') as cf:
|
|
cf.write(checksum)
|
|
-
|
|
- self.write_cleaner_log()
|
|
+ self.write_cleaner_log()
|
|
|
|
final_path = self.obfuscate_string(
|
|
os.path.join(self.sys_tmp, arc_path.split('/')[-1])
|
|
@@ -393,6 +343,30 @@ third party.
|
|
|
|
self.cleanup()
|
|
|
|
+ def rebuild_nested_archive(self):
|
|
+ """Handles repacking the nested tarball, now containing only obfuscated
|
|
+ copies of the reports, log files, manifest, etc...
|
|
+ """
|
|
+ # we have an archive of archives, so repack the obfuscated tarball
|
|
+ arc_name = self.arc_name + '-obfuscated'
|
|
+ self.setup_archive(name=arc_name)
|
|
+ for archive in self.completed_reports:
|
|
+ arc_dest = archive.final_archive_path.split('/')[-1]
|
|
+ checksum = self.get_new_checksum(archive.final_archive_path)
|
|
+ if checksum is not None:
|
|
+ dname = "checksums/%s.%s" % (arc_dest, self.hash_name)
|
|
+ self.archive.add_string(checksum, dest=dname)
|
|
+ for dirn, dirs, files in os.walk(self.nested_archive.extracted_path):
|
|
+ for filename in files:
|
|
+ fname = os.path.join(dirn, filename)
|
|
+ dname = fname.split(self.nested_archive.extracted_path)[-1]
|
|
+ dname = dname.lstrip('/')
|
|
+ self.archive.add_file(fname, dest=dname)
|
|
+ # remove it now so we don't balloon our fs space needs
|
|
+ os.remove(fname)
|
|
+ self.write_cleaner_log(archive=True)
|
|
+ return self.archive.finalize(self.opts.compression_type)
|
|
+
|
|
def compile_mapping_dict(self):
|
|
"""Build a dict that contains each parser's map as a key, with the
|
|
contents as that key's value. This will then be written to disk in the
|
|
@@ -441,7 +415,7 @@ third party.
|
|
self.log_error("Could not update mapping config file: %s"
|
|
% err)
|
|
|
|
- def write_cleaner_log(self):
|
|
+ def write_cleaner_log(self, archive=False):
|
|
"""When invoked via the command line, the logging from SoSCleaner will
|
|
not be added to the archive(s) it processes, so we need to write it
|
|
separately to disk
|
|
@@ -454,6 +428,10 @@ third party.
|
|
for line in self.sos_log_file.readlines():
|
|
logfile.write(line)
|
|
|
|
+ if archive:
|
|
+ self.obfuscate_file(log_name)
|
|
+ self.archive.add_file(log_name, dest="sos_logs/cleaner.log")
|
|
+
|
|
def get_new_checksum(self, archive_path):
|
|
"""Calculate a new checksum for the obfuscated archive, as the previous
|
|
checksum will no longer be valid
|
|
@@ -481,11 +459,11 @@ third party.
|
|
be obfuscated concurrently.
|
|
"""
|
|
try:
|
|
- if len(self.report_paths) > 1:
|
|
- msg = ("Found %s total reports to obfuscate, processing up to "
|
|
- "%s concurrently\n"
|
|
- % (len(self.report_paths), self.opts.jobs))
|
|
- self.ui_log.info(msg)
|
|
+ msg = (
|
|
+ "Found %s total reports to obfuscate, processing up to %s "
|
|
+ "concurrently\n" % (len(self.report_paths), self.opts.jobs)
|
|
+ )
|
|
+ self.ui_log.info(msg)
|
|
if self.opts.keep_binary_files:
|
|
self.ui_log.warning(
|
|
"WARNING: binary files that potentially contain sensitive "
|
|
@@ -494,53 +472,67 @@ third party.
|
|
pool = ThreadPoolExecutor(self.opts.jobs)
|
|
pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
|
|
pool.shutdown(wait=True)
|
|
+ # finally, obfuscate the nested archive if one exists
|
|
+ if self.nested_archive:
|
|
+ self._replace_obfuscated_archives()
|
|
+ self.obfuscate_report(self.nested_archive)
|
|
except KeyboardInterrupt:
|
|
self.ui_log.info("Exiting on user cancel")
|
|
os._exit(130)
|
|
|
|
+ def _replace_obfuscated_archives(self):
|
|
+ """When we have a nested archive, we need to rebuild the original
|
|
+ archive, which entails replacing the existing archives with their
|
|
+ obfuscated counterparts
|
|
+ """
|
|
+ for archive in self.completed_reports:
|
|
+ os.remove(archive.archive_path)
|
|
+ dest = self.nested_archive.extracted_path
|
|
+ tarball = archive.final_archive_path.split('/')[-1]
|
|
+ dest_name = os.path.join(dest, tarball)
|
|
+ shutil.move(archive.final_archive_path, dest)
|
|
+ archive.final_archive_path = dest_name
|
|
+
|
|
def preload_all_archives_into_maps(self):
|
|
"""Before doing the actual obfuscation, if we have multiple archives
|
|
to obfuscate then we need to preload each of them into the mappings
|
|
to ensure that node1 is obfuscated in node2 as well as node2 being
|
|
obfuscated in node1's archive.
|
|
"""
|
|
- self.log_info("Pre-loading multiple archives into obfuscation maps")
|
|
+ self.log_info("Pre-loading all archives into obfuscation maps")
|
|
for _arc in self.report_paths:
|
|
- is_dir = os.path.isdir(_arc)
|
|
- if is_dir:
|
|
- _arc_name = _arc
|
|
- else:
|
|
- archive = tarfile.open(_arc)
|
|
- _arc_name = _arc.split('/')[-1].split('.tar')[0]
|
|
- # for each parser, load the map_prep_file into memory, and then
|
|
- # send that for obfuscation. We don't actually obfuscate the file
|
|
- # here, do that in the normal archive loop
|
|
for _parser in self.parsers:
|
|
- if not _parser.prep_map_file:
|
|
+ try:
|
|
+ pfile = _arc.prep_files[_parser.name.lower().split()[0]]
|
|
+ if not pfile:
|
|
+ continue
|
|
+ except (IndexError, KeyError):
|
|
continue
|
|
- if isinstance(_parser.prep_map_file, str):
|
|
- _parser.prep_map_file = [_parser.prep_map_file]
|
|
- for parse_file in _parser.prep_map_file:
|
|
- _arc_path = os.path.join(_arc_name, parse_file)
|
|
+ if isinstance(pfile, str):
|
|
+ pfile = [pfile]
|
|
+ for parse_file in pfile:
|
|
+ self.log_debug("Attempting to load %s" % parse_file)
|
|
try:
|
|
- if is_dir:
|
|
- _pfile = open(_arc_path, 'r')
|
|
- content = _pfile.read()
|
|
- else:
|
|
- _pfile = archive.extractfile(_arc_path)
|
|
- content = _pfile.read().decode('utf-8')
|
|
- _pfile.close()
|
|
+ content = _arc.get_file_content(parse_file)
|
|
+ if not content:
|
|
+ continue
|
|
if isinstance(_parser, SoSUsernameParser):
|
|
_parser.load_usernames_into_map(content)
|
|
- for line in content.splitlines():
|
|
- if isinstance(_parser, SoSHostnameParser):
|
|
- _parser.load_hostname_into_map(line)
|
|
- self.obfuscate_line(line)
|
|
+ elif isinstance(_parser, SoSHostnameParser):
|
|
+ _parser.load_hostname_into_map(
|
|
+ content.splitlines()[0]
|
|
+ )
|
|
+ else:
|
|
+ for line in content.splitlines():
|
|
+ self.obfuscate_line(line)
|
|
except Exception as err:
|
|
- self.log_debug("Could not prep %s: %s"
|
|
- % (_arc_path, err))
|
|
+ self.log_info(
|
|
+ "Could not prepare %s from %s (archive: %s): %s"
|
|
+ % (_parser.name, parse_file, _arc.archive_name,
|
|
+ err)
|
|
+ )
|
|
|
|
- def obfuscate_report(self, report):
|
|
+ def obfuscate_report(self, archive):
|
|
"""Individually handle each archive or directory we've discovered by
|
|
running through each file therein.
|
|
|
|
@@ -549,17 +541,12 @@ third party.
|
|
:param report str: Filepath to the directory or archive
|
|
"""
|
|
try:
|
|
- if not os.access(report, os.W_OK):
|
|
- msg = "Insufficient permissions on %s" % report
|
|
- self.log_info(msg)
|
|
- self.ui_log.error(msg)
|
|
- return
|
|
-
|
|
- archive = SoSObfuscationArchive(report, self.tmpdir)
|
|
arc_md = self.cleaner_md.add_section(archive.archive_name)
|
|
start_time = datetime.now()
|
|
arc_md.add_field('start_time', start_time)
|
|
- archive.extract()
|
|
+ # don't double extract nested archives
|
|
+ if not archive.is_extracted:
|
|
+ archive.extract()
|
|
archive.report_msg("Beginning obfuscation...")
|
|
|
|
file_list = archive.get_file_list()
|
|
@@ -586,27 +573,28 @@ third party.
|
|
caller=archive.archive_name)
|
|
|
|
# if the archive was already a tarball, repack it
|
|
- method = archive.get_compression()
|
|
- if method:
|
|
- archive.report_msg("Re-compressing...")
|
|
- try:
|
|
- archive.rename_top_dir(
|
|
- self.obfuscate_string(archive.archive_name)
|
|
- )
|
|
- archive.compress(method)
|
|
- except Exception as err:
|
|
- self.log_debug("Archive %s failed to compress: %s"
|
|
- % (archive.archive_name, err))
|
|
- archive.report_msg("Failed to re-compress archive: %s"
|
|
- % err)
|
|
- return
|
|
+ if not archive.is_nested:
|
|
+ method = archive.get_compression()
|
|
+ if method:
|
|
+ archive.report_msg("Re-compressing...")
|
|
+ try:
|
|
+ archive.rename_top_dir(
|
|
+ self.obfuscate_string(archive.archive_name)
|
|
+ )
|
|
+ archive.compress(method)
|
|
+ except Exception as err:
|
|
+ self.log_debug("Archive %s failed to compress: %s"
|
|
+ % (archive.archive_name, err))
|
|
+ archive.report_msg("Failed to re-compress archive: %s"
|
|
+ % err)
|
|
+ return
|
|
+ self.completed_reports.append(archive)
|
|
|
|
end_time = datetime.now()
|
|
arc_md.add_field('end_time', end_time)
|
|
arc_md.add_field('run_time', end_time - start_time)
|
|
arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
|
|
arc_md.add_field('total_substitutions', archive.total_sub_count)
|
|
- self.completed_reports.append(archive)
|
|
rmsg = ''
|
|
if archive.removed_file_count:
|
|
rmsg = " [removed %s unprocessable files]"
|
|
@@ -615,7 +603,7 @@ third party.
|
|
|
|
except Exception as err:
|
|
self.ui_log.info("Exception while processing %s: %s"
|
|
- % (report, err))
|
|
+ % (archive.archive_name, err))
|
|
|
|
def obfuscate_file(self, filename, short_name=None, arc_name=None):
|
|
"""Obfuscate and individual file, line by line.
|
|
@@ -635,6 +623,8 @@ third party.
|
|
# the requested file doesn't exist in the archive
|
|
return
|
|
subs = 0
|
|
+ if not short_name:
|
|
+ short_name = filename.split('/')[-1]
|
|
if not os.path.islink(filename):
|
|
# don't run the obfuscation on the link, but on the actual file
|
|
# at some other point.
|
|
@@ -745,3 +735,5 @@ third party.
|
|
for parser in self.parsers:
|
|
_sec = parse_sec.add_section(parser.name.replace(' ', '_').lower())
|
|
_sec.add_field('entries', len(parser.mapping.dataset.keys()))
|
|
+
|
|
+# vim: set et ts=4 sw=4 :
|
|
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/archives/__init__.py
|
|
similarity index 81%
|
|
rename from sos/cleaner/obfuscation_archive.py
|
|
rename to sos/cleaner/archives/__init__.py
|
|
index ea0b7012..795c5a78 100644
|
|
--- a/sos/cleaner/obfuscation_archive.py
|
|
+++ b/sos/cleaner/archives/__init__.py
|
|
@@ -40,6 +40,10 @@ class SoSObfuscationArchive():
|
|
file_sub_list = []
|
|
total_sub_count = 0
|
|
removed_file_count = 0
|
|
+ type_name = 'undetermined'
|
|
+ description = 'undetermined'
|
|
+ is_nested = False
|
|
+ prep_files = {}
|
|
|
|
def __init__(self, archive_path, tmpdir):
|
|
self.archive_path = archive_path
|
|
@@ -50,7 +54,43 @@ class SoSObfuscationArchive():
|
|
self.soslog = logging.getLogger('sos')
|
|
self.ui_log = logging.getLogger('sos_ui')
|
|
self.skip_list = self._load_skip_list()
|
|
- self.log_info("Loaded %s as an archive" % self.archive_path)
|
|
+ self.is_extracted = False
|
|
+ self._load_self()
|
|
+ self.archive_root = ''
|
|
+ self.log_info(
|
|
+ "Loaded %s as type %s"
|
|
+ % (self.archive_path, self.description)
|
|
+ )
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ """Check if the archive is a well-known type we directly support"""
|
|
+ return False
|
|
+
|
|
+ def _load_self(self):
|
|
+ if self.is_tarfile:
|
|
+ self.tarobj = tarfile.open(self.archive_path)
|
|
+
|
|
+ def get_nested_archives(self):
|
|
+ """Return a list of ObfuscationArchives that represent additional
|
|
+ archives found within the target archive. For example, an archive from
|
|
+ `sos collect` will return a list of ``SoSReportArchive`` objects.
|
|
+
|
|
+ This should be overridden by individual types of ObfuscationArchive's
|
|
+ """
|
|
+ return []
|
|
+
|
|
+ def get_archive_root(self):
|
|
+ """Set the root path for the archive that should be prepended to any
|
|
+ filenames given to methods in this class.
|
|
+ """
|
|
+ if self.is_tarfile:
|
|
+ toplevel = self.tarobj.firstmember
|
|
+ if toplevel.isdir():
|
|
+ return toplevel.name
|
|
+ else:
|
|
+ return os.sep
|
|
+ return os.path.abspath(self.archive_path)
|
|
|
|
def report_msg(self, msg):
|
|
"""Helper to easily format ui messages on a per-report basis"""
|
|
@@ -96,10 +136,42 @@ class SoSObfuscationArchive():
|
|
os.remove(full_fname)
|
|
self.removed_file_count += 1
|
|
|
|
- def extract(self):
|
|
+ def format_file_name(self, fname):
|
|
+ """Based on the type of archive we're dealing with, do whatever that
|
|
+ archive requires to a provided **relative** filepath to be able to
|
|
+ access it within the archive
|
|
+ """
|
|
+ if not self.is_extracted:
|
|
+ if not self.archive_root:
|
|
+ self.archive_root = self.get_archive_root()
|
|
+ return os.path.join(self.archive_root, fname)
|
|
+ else:
|
|
+ return os.path.join(self.extracted_path, fname)
|
|
+
|
|
+ def get_file_content(self, fname):
|
|
+ """Return the content from the specified fname. Particularly useful for
|
|
+ tarball-type archives so we can retrieve prep file contents prior to
|
|
+ extracting the entire archive
|
|
+ """
|
|
+ if self.is_extracted is False and self.is_tarfile:
|
|
+ filename = self.format_file_name(fname)
|
|
+ try:
|
|
+ return self.tarobj.extractfile(filename).read().decode('utf-8')
|
|
+ except KeyError:
|
|
+ self.log_debug(
|
|
+ "Unable to retrieve %s: no such file in archive" % fname
|
|
+ )
|
|
+ return ''
|
|
+ else:
|
|
+ with open(self.format_file_name(fname), 'r') as to_read:
|
|
+ return to_read.read()
|
|
+
|
|
+ def extract(self, quiet=False):
|
|
if self.is_tarfile:
|
|
- self.report_msg("Extracting...")
|
|
+ if not quiet:
|
|
+ self.report_msg("Extracting...")
|
|
self.extracted_path = self.extract_self()
|
|
+ self.is_extracted = True
|
|
else:
|
|
self.extracted_path = self.archive_path
|
|
# if we're running as non-root (e.g. collector), then we can have a
|
|
@@ -317,3 +389,5 @@ class SoSObfuscationArchive():
|
|
return False
|
|
except UnicodeDecodeError:
|
|
return True
|
|
+
|
|
+# vim: set et ts=4 sw=4 :
|
|
diff --git a/sos/cleaner/archives/generic.py b/sos/cleaner/archives/generic.py
|
|
new file mode 100644
|
|
index 00000000..2ce6f09b
|
|
--- /dev/null
|
|
+++ b/sos/cleaner/archives/generic.py
|
|
@@ -0,0 +1,52 @@
|
|
+# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
|
|
+
|
|
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
+#
|
|
+# This copyrighted material is made available to anyone wishing to use,
|
|
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
+# version 2 of the GNU General Public License.
|
|
+#
|
|
+# See the LICENSE file in the source distribution for further information.
|
|
+
|
|
+
|
|
+from sos.cleaner.archives import SoSObfuscationArchive
|
|
+
|
|
+import os
|
|
+import tarfile
|
|
+
|
|
+
|
|
+class DataDirArchive(SoSObfuscationArchive):
|
|
+ """A plain directory on the filesystem that is not directly associated with
|
|
+ any known or supported collection utility
|
|
+ """
|
|
+
|
|
+ type_name = 'data_dir'
|
|
+ description = 'unassociated directory'
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ return os.path.isdir(arc_path)
|
|
+
|
|
+ def set_archive_root(self):
|
|
+ return os.path.abspath(self.archive_path)
|
|
+
|
|
+
|
|
+class TarballArchive(SoSObfuscationArchive):
|
|
+ """A generic tar archive that is not associated with any known or supported
|
|
+ collection utility
|
|
+ """
|
|
+
|
|
+ type_name = 'tarball'
|
|
+ description = 'unassociated tarball'
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ try:
|
|
+ return tarfile.is_tarfile(arc_path)
|
|
+ except Exception:
|
|
+ return False
|
|
+
|
|
+ def set_archive_root(self):
|
|
+ if self.tarobj.firstmember.isdir():
|
|
+ return self.tarobj.firstmember.name
|
|
+ return ''
|
|
diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
|
|
new file mode 100644
|
|
index 00000000..4401d710
|
|
--- /dev/null
|
|
+++ b/sos/cleaner/archives/sos.py
|
|
@@ -0,0 +1,106 @@
|
|
+# Copyright 2021 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
|
|
+
|
|
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
+#
|
|
+# This copyrighted material is made available to anyone wishing to use,
|
|
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
+# version 2 of the GNU General Public License.
|
|
+#
|
|
+# See the LICENSE file in the source distribution for further information.
|
|
+
|
|
+
|
|
+from sos.cleaner.archives import SoSObfuscationArchive
|
|
+
|
|
+import os
|
|
+import tarfile
|
|
+
|
|
+
|
|
+class SoSReportArchive(SoSObfuscationArchive):
|
|
+ """This is the class representing an sos report, or in other words the
|
|
+ type the archive the SoS project natively generates
|
|
+ """
|
|
+
|
|
+ type_name = 'report'
|
|
+ description = 'sos report archive'
|
|
+ prep_files = {
|
|
+ 'hostname': 'sos_commands/host/hostname',
|
|
+ 'ip': 'sos_commands/networking/ip_-o_addr',
|
|
+ 'mac': 'sos_commands/networking/ip_-d_address',
|
|
+ 'username': [
|
|
+ 'sos_commands/login/lastlog_-u_1000-60000',
|
|
+ 'sos_commands/login/lastlog_-u_60001-65536',
|
|
+ 'sos_commands/login/lastlog_-u_65537-4294967295',
|
|
+ # AD users will be reported here, but favor the lastlog files since
|
|
+ # those will include local users who have not logged in
|
|
+ 'sos_commands/login/last'
|
|
+ ]
|
|
+ }
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ try:
|
|
+ return tarfile.is_tarfile(arc_path) and 'sosreport-' in arc_path
|
|
+ except Exception:
|
|
+ return False
|
|
+
|
|
+
|
|
+class SoSReportDirectory(SoSReportArchive):
|
|
+ """This is the archive class representing a build directory, or in other
|
|
+ words what `sos report --clean` will end up using for in-line obfuscation
|
|
+ """
|
|
+
|
|
+ type_name = 'report_dir'
|
|
+ description = 'sos report directory'
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ if os.path.isdir(arc_path):
|
|
+ return 'sos_logs' in os.listdir(arc_path)
|
|
+ return False
|
|
+
|
|
+
|
|
+class SoSCollectorArchive(SoSObfuscationArchive):
|
|
+ """Archive class representing the tarball created by ``sos collect``. It
|
|
+ will not provide prep files on its own, however it will provide a list
|
|
+ of SoSReportArchive's which will then be used to prep the parsers
|
|
+ """
|
|
+
|
|
+ type_name = 'collect'
|
|
+ description = 'sos collect tarball'
|
|
+ is_nested = True
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ try:
|
|
+ return (tarfile.is_tarfile(arc_path) and 'sos-collect' in arc_path)
|
|
+ except Exception:
|
|
+ return False
|
|
+
|
|
+ def get_nested_archives(self):
|
|
+ self.extract(quiet=True)
|
|
+ _path = self.extracted_path
|
|
+ archives = []
|
|
+ for fname in os.listdir(_path):
|
|
+ arc_name = os.path.join(_path, fname)
|
|
+ if 'sosreport-' in fname and tarfile.is_tarfile(arc_name):
|
|
+ archives.append(SoSReportArchive(arc_name, self.tmpdir))
|
|
+ return archives
|
|
+
|
|
+
|
|
+class SoSCollectorDirectory(SoSCollectorArchive):
|
|
+ """The archive class representing the temp directory used by ``sos
|
|
+ collect`` when ``--clean`` is used during runtime.
|
|
+ """
|
|
+
|
|
+ type_name = 'collect_dir'
|
|
+ description = 'sos collect directory'
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ if os.path.isdir(arc_path):
|
|
+ for fname in os.listdir(arc_path):
|
|
+ if 'sos-collector-' in fname:
|
|
+ return True
|
|
+ return False
|
|
+
|
|
+# vim: set et ts=4 sw=4 :
|
|
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
|
|
index af6e375e..e62fd938 100644
|
|
--- a/sos/cleaner/parsers/__init__.py
|
|
+++ b/sos/cleaner/parsers/__init__.py
|
|
@@ -37,11 +37,6 @@ class SoSCleanerParser():
|
|
:cvar map_file_key: The key in the ``map_file`` to read when loading
|
|
previous obfuscation matches
|
|
:vartype map_file_key: ``str``
|
|
-
|
|
-
|
|
- :cvar prep_map_file: File to read from an archive to pre-seed the map with
|
|
- matches. E.G. ip_addr for loading IP addresses
|
|
- :vartype prep_map_fie: ``str``
|
|
"""
|
|
|
|
name = 'Undefined Parser'
|
|
@@ -49,7 +44,6 @@ class SoSCleanerParser():
|
|
skip_line_patterns = []
|
|
skip_files = []
|
|
map_file_key = 'unset'
|
|
- prep_map_file = []
|
|
|
|
def __init__(self, config={}):
|
|
if self.map_file_key in config:
|
|
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
|
|
index 71e13d3f..daa76a62 100644
|
|
--- a/sos/cleaner/parsers/hostname_parser.py
|
|
+++ b/sos/cleaner/parsers/hostname_parser.py
|
|
@@ -16,7 +16,6 @@ class SoSHostnameParser(SoSCleanerParser):
|
|
|
|
name = 'Hostname Parser'
|
|
map_file_key = 'hostname_map'
|
|
- prep_map_file = 'sos_commands/host/hostname'
|
|
regex_patterns = [
|
|
r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
|
|
]
|
|
diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py
|
|
index 525139e8..71d38be8 100644
|
|
--- a/sos/cleaner/parsers/ip_parser.py
|
|
+++ b/sos/cleaner/parsers/ip_parser.py
|
|
@@ -41,7 +41,6 @@ class SoSIPParser(SoSCleanerParser):
|
|
]
|
|
|
|
map_file_key = 'ip_map'
|
|
- prep_map_file = 'sos_commands/networking/ip_-o_addr'
|
|
|
|
def __init__(self, config):
|
|
self.mapping = SoSIPMap()
|
|
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
|
|
index 68de3727..694c6073 100644
|
|
--- a/sos/cleaner/parsers/keyword_parser.py
|
|
+++ b/sos/cleaner/parsers/keyword_parser.py
|
|
@@ -20,7 +20,6 @@ class SoSKeywordParser(SoSCleanerParser):
|
|
|
|
name = 'Keyword Parser'
|
|
map_file_key = 'keyword_map'
|
|
- prep_map_file = ''
|
|
|
|
def __init__(self, config, keywords=None, keyword_file=None):
|
|
self.mapping = SoSKeywordMap()
|
|
diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py
|
|
index 7ca80b8d..c74288cf 100644
|
|
--- a/sos/cleaner/parsers/mac_parser.py
|
|
+++ b/sos/cleaner/parsers/mac_parser.py
|
|
@@ -30,7 +30,6 @@ class SoSMacParser(SoSCleanerParser):
|
|
'534f:53'
|
|
)
|
|
map_file_key = 'mac_map'
|
|
- prep_map_file = 'sos_commands/networking/ip_-d_address'
|
|
|
|
def __init__(self, config):
|
|
self.mapping = SoSMacMap()
|
|
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
|
|
index b142e371..35377a31 100644
|
|
--- a/sos/cleaner/parsers/username_parser.py
|
|
+++ b/sos/cleaner/parsers/username_parser.py
|
|
@@ -25,14 +25,6 @@ class SoSUsernameParser(SoSCleanerParser):
|
|
|
|
name = 'Username Parser'
|
|
map_file_key = 'username_map'
|
|
- prep_map_file = [
|
|
- 'sos_commands/login/lastlog_-u_1000-60000',
|
|
- 'sos_commands/login/lastlog_-u_60001-65536',
|
|
- 'sos_commands/login/lastlog_-u_65537-4294967295',
|
|
- # AD users will be reported here, but favor the lastlog files since
|
|
- # those will include local users who have not logged in
|
|
- 'sos_commands/login/last'
|
|
- ]
|
|
regex_patterns = []
|
|
skip_list = [
|
|
'core',
|
|
diff --git a/tests/cleaner_tests/existing_archive.py b/tests/cleaner_tests/existing_archive.py
|
|
index 0eaf6c8d..e13d1cae 100644
|
|
--- a/tests/cleaner_tests/existing_archive.py
|
|
+++ b/tests/cleaner_tests/existing_archive.py
|
|
@@ -28,6 +28,13 @@ class ExistingArchiveCleanTest(StageTwoReportTest):
|
|
def test_obfuscation_log_created(self):
|
|
self.assertFileExists(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE))
|
|
|
|
+ def test_archive_type_correct(self):
|
|
+ with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
|
|
+ for line in log:
|
|
+ if "Loaded %s" % ARCHIVE in line:
|
|
+ assert 'as type sos report archive' in line, "Incorrect archive type detected: %s" % line
|
|
+ break
|
|
+
|
|
def test_from_cmdline_logged(self):
|
|
with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
|
|
for line in log:
|
|
diff --git a/tests/cleaner_tests/full_report_run.py b/tests/cleaner_tests/full_report_run.py
|
|
index 3b28e7a2..2de54946 100644
|
|
--- a/tests/cleaner_tests/full_report_run.py
|
|
+++ b/tests/cleaner_tests/full_report_run.py
|
|
@@ -35,6 +35,9 @@ class FullCleanTest(StageTwoReportTest):
|
|
def test_tarball_named_obfuscated(self):
|
|
self.assertTrue('obfuscated' in self.archive)
|
|
|
|
+ def test_archive_type_correct(self):
|
|
+ self.assertSosLogContains('Loaded .* as type sos report directory')
|
|
+
|
|
def test_hostname_not_in_any_file(self):
|
|
host = self.sysinfo['pre']['networking']['hostname']
|
|
# much faster to just use grep here
|
|
diff --git a/tests/cleaner_tests/report_with_mask.py b/tests/cleaner_tests/report_with_mask.py
|
|
index 4f94ba33..08e873d4 100644
|
|
--- a/tests/cleaner_tests/report_with_mask.py
|
|
+++ b/tests/cleaner_tests/report_with_mask.py
|
|
@@ -31,6 +31,9 @@ class ReportWithMask(StageOneReportTest):
|
|
def test_tarball_named_obfuscated(self):
|
|
self.assertTrue('obfuscated' in self.archive)
|
|
|
|
+ def test_archive_type_correct(self):
|
|
+ self.assertSosLogContains('Loaded .* as type sos report directory')
|
|
+
|
|
def test_localhost_was_obfuscated(self):
|
|
self.assertFileHasContent('/etc/hostname', 'host0')
|
|
|
|
--
|
|
2.31.1
|
|
|
|
From 9b119f860eaec089f7ef884ff39c42589a662994 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Wed, 1 Sep 2021 00:34:04 -0400
|
|
Subject: [PATCH] [hostname_map] Add a catch for single-character hostnames
|
|
|
|
If a log file was truncated at a specific boundary in a string of the
|
|
FQDN of the host such that we only get a couple characters before the
|
|
rest of the domain, we would previously bodly replace all instances of
|
|
that character with the obfuscated short name; not very helpful.
|
|
|
|
Instead, don't sanitize the short name if this happens and instead
|
|
obfuscate the whole FQDN as 'unknown.example.com'.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/cleaner/mappings/hostname_map.py | 9 ++++++++-
|
|
1 file changed, 8 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
|
|
index d4b2c88e..e70a5530 100644
|
|
--- a/sos/cleaner/mappings/hostname_map.py
|
|
+++ b/sos/cleaner/mappings/hostname_map.py
|
|
@@ -184,7 +184,14 @@ class SoSHostnameMap(SoSMap):
|
|
hostname = host[0]
|
|
domain = host[1:]
|
|
# obfuscate the short name
|
|
- ob_hostname = self.sanitize_short_name(hostname)
|
|
+ if len(hostname) > 2:
|
|
+ ob_hostname = self.sanitize_short_name(hostname)
|
|
+ else:
|
|
+ # by best practice it appears the host part of the fqdn was cut
|
|
+ # off due to some form of truncating, as such don't obfuscate
|
|
+ # short strings that are likely to throw off obfuscation of
|
|
+ # unrelated bits and paths
|
|
+ ob_hostname = 'unknown'
|
|
ob_domain = self.sanitize_domain(domain)
|
|
self.dataset[item] = ob_domain
|
|
return '.'.join([ob_hostname, ob_domain])
|
|
--
|
|
2.31.1
|
|
|
|
From f3f3e763d7c31b7b7cafdf8dd4dab87056fb7696 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Wed, 1 Sep 2021 15:54:55 -0400
|
|
Subject: [PATCH] [cleaner] Add support for Insights client archives
|
|
|
|
Adds a new type of `SoSObfuscationArchive` to add support for
|
|
obfuscating archives generated by the Insights project.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
man/en/sos-clean.1 | 1 +
|
|
sos/cleaner/__init__.py | 4 ++-
|
|
sos/cleaner/archives/insights.py | 42 ++++++++++++++++++++++++++++++++
|
|
3 files changed, 46 insertions(+), 1 deletion(-)
|
|
create mode 100644 sos/cleaner/archives/insights.py
|
|
|
|
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
|
|
index 54026713..358ec0cb 100644
|
|
--- a/man/en/sos-clean.1
|
|
+++ b/man/en/sos-clean.1
|
|
@@ -105,6 +105,7 @@ The following are accepted values for this option:
|
|
\fBauto\fR Automatically detect the archive type
|
|
\fBreport\fR An archive generated by \fBsos report\fR
|
|
\fBcollect\fR An archive generated by \fBsos collect\fR
|
|
+ \fBinsights\fR An archive generated by the \fBinsights-client\fR package
|
|
|
|
The following may also be used, however note that these do not attempt to pre-load
|
|
any information from the archives into the parsers. This means that, among other limitations,
|
|
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
index 6d2eb483..3e08aa28 100644
|
|
--- a/sos/cleaner/__init__.py
|
|
+++ b/sos/cleaner/__init__.py
|
|
@@ -29,6 +29,7 @@ from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
|
|
SoSCollectorArchive,
|
|
SoSCollectorDirectory)
|
|
from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
|
|
+from sos.cleaner.archives.insights import InsightsArchive
|
|
from sos.utilities import get_human_readable
|
|
from textwrap import fill
|
|
|
|
@@ -100,6 +101,7 @@ class SoSCleaner(SoSComponent):
|
|
SoSReportArchive,
|
|
SoSCollectorDirectory,
|
|
SoSCollectorArchive,
|
|
+ InsightsArchive,
|
|
# make sure these two are always last as they are fallbacks
|
|
DataDirArchive,
|
|
TarballArchive
|
|
@@ -194,7 +196,7 @@ third party.
|
|
help='The directory or archive to obfuscate')
|
|
clean_grp.add_argument('--archive-type', default='auto',
|
|
choices=['auto', 'report', 'collect',
|
|
- 'data-dir', 'tarball'],
|
|
+ 'insights', 'data-dir', 'tarball'],
|
|
help=('Specify what kind of archive the target '
|
|
'was generated as'))
|
|
clean_grp.add_argument('--domains', action='extend', default=[],
|
|
diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py
|
|
new file mode 100644
|
|
index 00000000..dab48b16
|
|
--- /dev/null
|
|
+++ b/sos/cleaner/archives/insights.py
|
|
@@ -0,0 +1,42 @@
|
|
+# Copyright 2021 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
|
|
+
|
|
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
+#
|
|
+# This copyrighted material is made available to anyone wishing to use,
|
|
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
+# version 2 of the GNU General Public License.
|
|
+#
|
|
+# See the LICENSE file in the source distribution for further information.
|
|
+
|
|
+
|
|
+from sos.cleaner.archives import SoSObfuscationArchive
|
|
+
|
|
+import tarfile
|
|
+
|
|
+
|
|
+class InsightsArchive(SoSObfuscationArchive):
|
|
+ """This class represents archives generated by the insights-client utility
|
|
+ for RHEL systems.
|
|
+ """
|
|
+
|
|
+ type_name = 'insights'
|
|
+ description = 'insights-client archive'
|
|
+
|
|
+ prep_files = {
|
|
+ 'hostname': 'data/insights_commands/hostname_-f',
|
|
+ 'ip': 'data/insights_commands/ip_addr',
|
|
+ 'mac': 'data/insights_commands/ip_addr'
|
|
+ }
|
|
+
|
|
+ @classmethod
|
|
+ def check_is_type(cls, arc_path):
|
|
+ try:
|
|
+ return tarfile.is_tarfile(arc_path) and 'insights-' in arc_path
|
|
+ except Exception:
|
|
+ return False
|
|
+
|
|
+ def get_archive_root(self):
|
|
+ top = self.archive_path.split('/')[-1].split('.tar')[0]
|
|
+ if self.tarobj.firstmember.name == '.':
|
|
+ top = './' + top
|
|
+ return top
|
|
--
|
|
2.31.1
|
|
|
|
From 9639dc3d240076b55f2a1d04b43ea42bebd09215 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Tue, 16 Nov 2021 17:50:42 -0500
|
|
Subject: [PATCH] [clean,hostname_parser] Source /etc/hosts for obfuscation
|
|
|
|
Up until now, our sourcing of hostnames/domains for obfuscation has been
|
|
dependent upon the output of the `hostname` command. However, some
|
|
scenarios have come up where sourcing `/etc/hosts` is advantageous for
|
|
several reasons:
|
|
|
|
First, if `hostname` output is unavailable, this provides a fallback
|
|
measure.
|
|
|
|
Second, `/etc/hosts` is a common place to have short names defined which
|
|
would otherwise not be detected (or at the very least would result in a
|
|
race condition based on where/if the short name was elsewhere able to be
|
|
gleaned from an FQDN), thus leaving the potential for unobfuscated data
|
|
in an archive.
|
|
|
|
Due to both the nature of hostname obfuscation and the malleable syntax
|
|
of `/etc/hosts`, the parsing of this file needs special handling not
|
|
covered by our more generic parsing and obfuscation methods.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/cleaner/__init__.py | 11 ++++++++---
|
|
sos/cleaner/archives/sos.py | 5 ++++-
|
|
sos/cleaner/parsers/hostname_parser.py | 19 +++++++++++++++++++
|
|
3 files changed, 31 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
index ed461a8f..3f530d44 100644
|
|
--- a/sos/cleaner/__init__.py
|
|
+++ b/sos/cleaner/__init__.py
|
|
@@ -523,9 +523,14 @@ third party.
|
|
if isinstance(_parser, SoSUsernameParser):
|
|
_parser.load_usernames_into_map(content)
|
|
elif isinstance(_parser, SoSHostnameParser):
|
|
- _parser.load_hostname_into_map(
|
|
- content.splitlines()[0]
|
|
- )
|
|
+ if 'hostname' in parse_file:
|
|
+ _parser.load_hostname_into_map(
|
|
+ content.splitlines()[0]
|
|
+ )
|
|
+ elif 'etc/hosts' in parse_file:
|
|
+ _parser.load_hostname_from_etc_hosts(
|
|
+ content
|
|
+ )
|
|
else:
|
|
for line in content.splitlines():
|
|
self.obfuscate_line(line)
|
|
diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
|
|
index 4401d710..f8720c88 100644
|
|
--- a/sos/cleaner/archives/sos.py
|
|
+++ b/sos/cleaner/archives/sos.py
|
|
@@ -23,7 +23,10 @@ class SoSReportArchive(SoSObfuscationArchive):
|
|
type_name = 'report'
|
|
description = 'sos report archive'
|
|
prep_files = {
|
|
- 'hostname': 'sos_commands/host/hostname',
|
|
+ 'hostname': [
|
|
+ 'sos_commands/host/hostname',
|
|
+ 'etc/hosts'
|
|
+ ],
|
|
'ip': 'sos_commands/networking/ip_-o_addr',
|
|
'mac': 'sos_commands/networking/ip_-d_address',
|
|
'username': [
|
|
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
|
|
index daa76a62..0a733bee 100644
|
|
--- a/sos/cleaner/parsers/hostname_parser.py
|
|
+++ b/sos/cleaner/parsers/hostname_parser.py
|
|
@@ -61,6 +61,25 @@ class SoSHostnameParser(SoSCleanerParser):
|
|
self.mapping.add(high_domain)
|
|
self.mapping.add(hostname_string)
|
|
|
|
+ def load_hostname_from_etc_hosts(self, content):
|
|
+ """Parse an archive's copy of /etc/hosts, which requires handling that
|
|
+ is separate from the output of the `hostname` command. Just like
|
|
+ load_hostname_into_map(), this has to be done explicitly and we
|
|
+ cannot rely upon the more generic methods to do this reliably.
|
|
+ """
|
|
+ lines = content.splitlines()
|
|
+ for line in lines:
|
|
+ if line.startswith('#') or 'localhost' in line:
|
|
+ continue
|
|
+ hostln = line.split()[1:]
|
|
+ for host in hostln:
|
|
+ if len(host.split('.')) == 1:
|
|
+ # only generate a mapping for fqdns but still record the
|
|
+ # short name here for later obfuscation with parse_line()
|
|
+ self.short_names.append(host)
|
|
+ else:
|
|
+ self.mapping.add(host)
|
|
+
|
|
def parse_line(self, line):
|
|
"""Override the default parse_line() method to also check for the
|
|
shortname of the host derived from the hostname.
|
|
--
|
|
2.31.1
|
|
|
|
From c1680226b53452b18f27f2e76c3e0e03e521f935 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Wed, 17 Nov 2021 13:11:33 -0500
|
|
Subject: [PATCH] [clean, hostname] Fix unintentionally case sensitive
|
|
shortname handling
|
|
|
|
It was discovered that our extra handling for shortnames was
|
|
unintentionally case sensitive. Fix this to ensure that shortnames are
|
|
obfuscated regardless of case in all collected text.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/cleaner/mappings/hostname_map.py | 6 +++---
|
|
sos/cleaner/parsers/hostname_parser.py | 8 +++++---
|
|
tests/cleaner_tests/full_report_run.py | 21 ++++++++++++++++++++-
|
|
3 files changed, 28 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
|
|
index e70a5530..0fe78fb1 100644
|
|
--- a/sos/cleaner/mappings/hostname_map.py
|
|
+++ b/sos/cleaner/mappings/hostname_map.py
|
|
@@ -169,13 +169,13 @@ class SoSHostnameMap(SoSMap):
|
|
|
|
def sanitize_item(self, item):
|
|
host = item.split('.')
|
|
- if all([h.isupper() for h in host]):
|
|
+ if len(host) > 1 and all([h.isupper() for h in host]):
|
|
# by convention we have just a domain
|
|
_host = [h.lower() for h in host]
|
|
return self.sanitize_domain(_host).upper()
|
|
if len(host) == 1:
|
|
# we have a shortname for a host
|
|
- return self.sanitize_short_name(host[0])
|
|
+ return self.sanitize_short_name(host[0].lower())
|
|
if len(host) == 2:
|
|
# we have just a domain name, e.g. example.com
|
|
return self.sanitize_domain(host)
|
|
@@ -185,7 +185,7 @@ class SoSHostnameMap(SoSMap):
|
|
domain = host[1:]
|
|
# obfuscate the short name
|
|
if len(hostname) > 2:
|
|
- ob_hostname = self.sanitize_short_name(hostname)
|
|
+ ob_hostname = self.sanitize_short_name(hostname.lower())
|
|
else:
|
|
# by best practice it appears the host part of the fqdn was cut
|
|
# off due to some form of truncating, as such don't obfuscate
|
|
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
|
|
index 0a733bee..7fd0e698 100644
|
|
--- a/sos/cleaner/parsers/hostname_parser.py
|
|
+++ b/sos/cleaner/parsers/hostname_parser.py
|
|
@@ -8,6 +8,8 @@
|
|
#
|
|
# See the LICENSE file in the source distribution for further information.
|
|
|
|
+import re
|
|
+
|
|
from sos.cleaner.parsers import SoSCleanerParser
|
|
from sos.cleaner.mappings.hostname_map import SoSHostnameMap
|
|
|
|
@@ -91,9 +93,9 @@ class SoSHostnameParser(SoSCleanerParser):
|
|
"""
|
|
if search in self.mapping.skip_keys:
|
|
return ln, count
|
|
- if search in ln:
|
|
- count += ln.count(search)
|
|
- ln = ln.replace(search, self.mapping.get(repl or search))
|
|
+ _reg = re.compile(search, re.I)
|
|
+ if _reg.search(ln):
|
|
+ return _reg.subn(self.mapping.get(repl or search), ln)
|
|
return ln, count
|
|
|
|
count = 0
|
|
diff --git a/tests/cleaner_tests/full_report_run.py b/tests/cleaner_tests/full_report_run.py
|
|
index 2de54946..0b23acaf 100644
|
|
--- a/tests/cleaner_tests/full_report_run.py
|
|
+++ b/tests/cleaner_tests/full_report_run.py
|
|
@@ -26,6 +26,24 @@ class FullCleanTest(StageTwoReportTest):
|
|
# replace with an empty placeholder, make sure that this test case is not
|
|
# influenced by previous clean runs
|
|
files = ['/etc/sos/cleaner/default_mapping']
|
|
+ packages = {
|
|
+ 'rhel': ['python3-systemd'],
|
|
+ 'ubuntu': ['python3-systemd']
|
|
+ }
|
|
+
|
|
+ def pre_sos_setup(self):
|
|
+ # ensure that case-insensitive matching of FQDNs and shortnames work
|
|
+ from systemd import journal
|
|
+ from socket import gethostname
|
|
+ host = gethostname()
|
|
+ short = host.split('.')[0]
|
|
+ sosfd = journal.stream('sos-testing')
|
|
+ sosfd.write(
|
|
+ "This is a test line from sos clean testing. The hostname %s "
|
|
+ "should not appear, nor should %s in an obfuscated archive. The "
|
|
+ "shortnames of %s and %s should also not appear."
|
|
+ % (host.lower(), host.upper(), short.lower(), short.upper())
|
|
+ )
|
|
|
|
def test_private_map_was_generated(self):
|
|
self.assertOutputContains('A mapping of obfuscated elements is available at')
|
|
@@ -40,8 +58,9 @@ class FullCleanTest(StageTwoReportTest):
|
|
|
|
def test_hostname_not_in_any_file(self):
|
|
host = self.sysinfo['pre']['networking']['hostname']
|
|
+ short = host.split('.')[0]
|
|
# much faster to just use grep here
|
|
- content = self.grep_for_content(host)
|
|
+ content = self.grep_for_content(host) + self.grep_for_content(short)
|
|
if not content:
|
|
assert True
|
|
else:
|
|
--
|
|
2.31.1
|
|
|
|
From aaeb8cb57ed55598ab744b96d4f127aedebcb292 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Tue, 21 Sep 2021 15:23:20 -0400
|
|
Subject: [PATCH] [build] Add archives to setup.py packages
|
|
|
|
Adds the newly abstracted `sos.cleaner.archives` package to `setup.py`
|
|
so that manual builds will properly include it.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
setup.py | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/setup.py b/setup.py
|
|
index 1e8d8e2dc5..7653b59de3 100644
|
|
--- a/setup.py
|
|
+++ b/setup.py
|
|
@@ -102,7 +102,7 @@ def copy_file (self, filename, dirname):
|
|
'sos.policies.package_managers', 'sos.policies.init_systems',
|
|
'sos.report', 'sos.report.plugins', 'sos.collector',
|
|
'sos.collector.clusters', 'sos.cleaner', 'sos.cleaner.mappings',
|
|
- 'sos.cleaner.parsers'
|
|
+ 'sos.cleaner.parsers', 'sos.cleaner.archives'
|
|
],
|
|
cmdclass=cmdclass,
|
|
command_options=command_options,
|