244 lines
9.8 KiB
Diff
244 lines
9.8 KiB
Diff
|
From fc0218638f3e865c4315823e72aef2f46d012d07 Mon Sep 17 00:00:00 2001
|
||
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
||
|
Date: Wed, 14 Apr 2021 11:55:03 -0400
|
||
|
Subject: [PATCH 1/2] [clean] Load maps from all archives before obfuscation
|
||
|
loop
|
||
|
|
||
|
Previously, maps were being prepped via archives after extraction. This
|
||
|
reduced the amount of file IO being done, but made it so that necessary
|
||
|
obfuscations from later archives in a series would not be obfuscated in
|
||
|
the archives obfuscated before those later archives were extracted.
|
||
|
|
||
|
Fix this by extracting the map prep files into memory for each archive
|
||
|
to prep the maps before we enter the obfuscation loop entirely.
|
||
|
|
||
|
Closes: #2490
|
||
|
Related: RHBZ#1930181
|
||
|
Resolves: #2492
|
||
|
|
||
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
||
|
---
|
||
|
sos/cleaner/__init__.py | 69 +++++++++++++++-----------
|
||
|
sos/cleaner/parsers/username_parser.py | 13 +++--
|
||
|
2 files changed, 45 insertions(+), 37 deletions(-)
|
||
|
|
||
|
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
||
|
index b9eb61ef..d10cdc55 100644
|
||
|
--- a/sos/cleaner/__init__.py
|
||
|
+++ b/sos/cleaner/__init__.py
|
||
|
@@ -292,6 +292,7 @@ third party.
|
||
|
|
||
|
# we have at least one valid target to obfuscate
|
||
|
self.completed_reports = []
|
||
|
+ self.preload_all_archives_into_maps()
|
||
|
self.obfuscate_report_paths()
|
||
|
|
||
|
if not self.completed_reports:
|
||
|
@@ -473,6 +474,44 @@ third party.
|
||
|
self.ui_log.info("Exiting on user cancel")
|
||
|
os._exit(130)
|
||
|
|
||
|
+ def preload_all_archives_into_maps(self):
|
||
|
+ """Before doing the actual obfuscation, if we have multiple archives
|
||
|
+ to obfuscate then we need to preload each of them into the mappings
|
||
|
+ to ensure that node1 is obfuscated in node2 as well as node2 being
|
||
|
+ obfuscated in node1's archive.
|
||
|
+ """
|
||
|
+ self.log_info("Pre-loading multiple archives into obfuscation maps")
|
||
|
+ for _arc in self.report_paths:
|
||
|
+ is_dir = os.path.isdir(_arc)
|
||
|
+ if is_dir:
|
||
|
+ _arc_name = _arc
|
||
|
+ else:
|
||
|
+ archive = tarfile.open(_arc)
|
||
|
+ _arc_name = _arc.split('/')[-1].split('.tar')[0]
|
||
|
+ # for each parser, load the map_prep_file into memory, and then
|
||
|
+ # send that for obfuscation. We don't actually obfuscate the file
|
||
|
+ # here, do that in the normal archive loop
|
||
|
+ for _parser in self.parsers:
|
||
|
+ if not _parser.prep_map_file:
|
||
|
+ continue
|
||
|
+ _arc_path = os.path.join(_arc_name, _parser.prep_map_file)
|
||
|
+ try:
|
||
|
+ if is_dir:
|
||
|
+ _pfile = open(_arc_path, 'r')
|
||
|
+ content = _pfile.read()
|
||
|
+ else:
|
||
|
+ _pfile = archive.extractfile(_arc_path)
|
||
|
+ content = _pfile.read().decode('utf-8')
|
||
|
+ _pfile.close()
|
||
|
+ if isinstance(_parser, SoSUsernameParser):
|
||
|
+ _parser.load_usernames_into_map(content)
|
||
|
+ for line in content.splitlines():
|
||
|
+ if isinstance(_parser, SoSHostnameParser):
|
||
|
+ _parser.load_hostname_into_map(line)
|
||
|
+ self.obfuscate_line(line, _parser.prep_map_file)
|
||
|
+ except Exception as err:
|
||
|
+ self.log_debug("Could not prep %s: %s" % (_arc_path, err))
|
||
|
+
|
||
|
def obfuscate_report(self, report):
|
||
|
"""Individually handle each archive or directory we've discovered by
|
||
|
running through each file therein.
|
||
|
@@ -493,7 +532,6 @@ third party.
|
||
|
start_time = datetime.now()
|
||
|
arc_md.add_field('start_time', start_time)
|
||
|
archive.extract()
|
||
|
- self.prep_maps_from_archive(archive)
|
||
|
archive.report_msg("Beginning obfuscation...")
|
||
|
|
||
|
file_list = archive.get_file_list()
|
||
|
@@ -542,35 +580,6 @@ third party.
|
||
|
self.ui_log.info("Exception while processing %s: %s"
|
||
|
% (report, err))
|
||
|
|
||
|
- def prep_maps_from_archive(self, archive):
|
||
|
- """Open specific files from an archive and try to load those values
|
||
|
- into our mappings before iterating through the entire archive.
|
||
|
-
|
||
|
- Positional arguments:
|
||
|
-
|
||
|
- :param archive SoSObfuscationArchive: An open archive object
|
||
|
- """
|
||
|
- for parser in self.parsers:
|
||
|
- if not parser.prep_map_file:
|
||
|
- continue
|
||
|
- prep_file = archive.get_file_path(parser.prep_map_file)
|
||
|
- if not prep_file:
|
||
|
- self.log_debug("Could not prepare %s: %s does not exist"
|
||
|
- % (parser.name, parser.prep_map_file),
|
||
|
- caller=archive.archive_name)
|
||
|
- continue
|
||
|
- # this is a bit clunky, but we need to load this particular
|
||
|
- # parser in a different way due to how hostnames are validated for
|
||
|
- # obfuscation
|
||
|
- if isinstance(parser, SoSHostnameParser):
|
||
|
- with open(prep_file, 'r') as host_file:
|
||
|
- hostname = host_file.readline().strip()
|
||
|
- parser.load_hostname_into_map(hostname)
|
||
|
- if isinstance(parser, SoSUsernameParser):
|
||
|
- parser.load_usernames_into_map(prep_file)
|
||
|
- self.obfuscate_file(prep_file, parser.prep_map_file,
|
||
|
- archive.archive_name)
|
||
|
-
|
||
|
def obfuscate_file(self, filename, short_name=None, arc_name=None):
|
||
|
"""Obfuscate and individual file, line by line.
|
||
|
|
||
|
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
|
||
|
index 5223c018..2bb6c7f3 100644
|
||
|
--- a/sos/cleaner/parsers/username_parser.py
|
||
|
+++ b/sos/cleaner/parsers/username_parser.py
|
||
|
@@ -39,16 +39,15 @@ class SoSUsernameParser(SoSCleanerParser):
|
||
|
super(SoSUsernameParser, self).__init__(conf_file)
|
||
|
self.mapping.load_names_from_options(opt_names)
|
||
|
|
||
|
- def load_usernames_into_map(self, fname):
|
||
|
+ def load_usernames_into_map(self, content):
|
||
|
"""Since we don't get the list of usernames from a straight regex for
|
||
|
this parser, we need to override the initial parser prepping here.
|
||
|
"""
|
||
|
- with open(fname, 'r') as lastfile:
|
||
|
- for line in lastfile.read().splitlines()[1:]:
|
||
|
- user = line.split()[0]
|
||
|
- if user in self.skip_list:
|
||
|
- continue
|
||
|
- self.mapping.get(user)
|
||
|
+ for line in content.splitlines()[1:]:
|
||
|
+ user = line.split()[0]
|
||
|
+ if user in self.skip_list:
|
||
|
+ continue
|
||
|
+ self.mapping.get(user)
|
||
|
|
||
|
def parse_line(self, line):
|
||
|
count = 0
|
||
|
--
|
||
|
2.26.3
|
||
|
|
||
|
|
||
|
From b713f458bfa92427147de754ea36054bfde53d71 Mon Sep 17 00:00:00 2001
|
||
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
||
|
Date: Wed, 14 Apr 2021 12:22:28 -0400
|
||
|
Subject: [PATCH 2/2] [clean] Remove duplicate file skipping within
|
||
|
obfuscate_line()
|
||
|
|
||
|
A redundant file skipping check was being executed within
|
||
|
`obfuscate_line()` that would cause subsequent archives being obfuscated
|
||
|
to skip line obfuscation within a file, despite iterating through the
|
||
|
entire file.
|
||
|
|
||
|
Remove this redundant check, thus allowing proper obfuscation.
|
||
|
|
||
|
Closes: #2490
|
||
|
Related: RHBZ#1930181
|
||
|
Resolves: #2492
|
||
|
|
||
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
||
|
---
|
||
|
sos/cleaner/__init__.py | 11 +++--------
|
||
|
sos/cleaner/obfuscation_archive.py | 2 --
|
||
|
2 files changed, 3 insertions(+), 10 deletions(-)
|
||
|
|
||
|
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
||
|
index d10cdc55..bdd24f95 100644
|
||
|
--- a/sos/cleaner/__init__.py
|
||
|
+++ b/sos/cleaner/__init__.py
|
||
|
@@ -508,7 +508,7 @@ third party.
|
||
|
for line in content.splitlines():
|
||
|
if isinstance(_parser, SoSHostnameParser):
|
||
|
_parser.load_hostname_into_map(line)
|
||
|
- self.obfuscate_line(line, _parser.prep_map_file)
|
||
|
+ self.obfuscate_line(line)
|
||
|
except Exception as err:
|
||
|
self.log_debug("Could not prep %s: %s" % (_arc_path, err))
|
||
|
|
||
|
@@ -606,7 +606,7 @@ third party.
|
||
|
if not line.strip():
|
||
|
continue
|
||
|
try:
|
||
|
- line, count = self.obfuscate_line(line, short_name)
|
||
|
+ line, count = self.obfuscate_line(line)
|
||
|
subs += count
|
||
|
tfile.write(line)
|
||
|
except Exception as err:
|
||
|
@@ -631,7 +631,7 @@ third party.
|
||
|
pass
|
||
|
return string_data
|
||
|
|
||
|
- def obfuscate_line(self, line, filename):
|
||
|
+ def obfuscate_line(self, line):
|
||
|
"""Run a line through each of the obfuscation parsers, keeping a
|
||
|
cumulative total of substitutions done on that particular line.
|
||
|
|
||
|
@@ -639,16 +639,11 @@ third party.
|
||
|
|
||
|
:param line str: The raw line as read from the file being
|
||
|
processed
|
||
|
- :param filename str: Filename the line was read from
|
||
|
|
||
|
Returns the fully obfuscated line and the number of substitutions made
|
||
|
"""
|
||
|
count = 0
|
||
|
for parser in self.parsers:
|
||
|
- if filename and any([
|
||
|
- re.match(_s, filename) for _s in parser.skip_files
|
||
|
- ]):
|
||
|
- continue
|
||
|
try:
|
||
|
line, _count = parser.parse_line(line)
|
||
|
count += _count
|
||
|
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
|
||
|
index 84ca30cd..c64ab13b 100644
|
||
|
--- a/sos/cleaner/obfuscation_archive.py
|
||
|
+++ b/sos/cleaner/obfuscation_archive.py
|
||
|
@@ -219,8 +219,6 @@ class SoSObfuscationArchive():
|
||
|
:param filename str: Filename relative to the extracted
|
||
|
archive root
|
||
|
"""
|
||
|
- if filename in self.file_sub_list:
|
||
|
- return True
|
||
|
|
||
|
if not os.path.isfile(self.get_file_path(filename)):
|
||
|
return True
|
||
|
--
|
||
|
2.26.3
|
||
|
|