sos/SOURCES/sos-bz2138173-clean-host-be...

80 lines
3.1 KiB
Diff

From 21101d80610c43a7c00de3dfaa5ff043d1f8324a Mon Sep 17 00:00:00 2001
From: Pavel Moravec <pmoravec@redhat.com>
Date: Thu, 27 Oct 2022 18:00:28 +0200
Subject: [PATCH] [cleaner] Apply compile_regexes after a regular parse line
Hostname parser treats strings like 'host.domain.com' with precompiled
domain 'domain.com' in a wrong way. It first obfuscates the domain while
subsequent _parse_line skips host obfuscation.
Calling _parse_line before _parse_line_with_compiled_regexes does clean
both the host name and the domain name well.
Adding a unittest with a reproducer.
Resolves: #3054
Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
---
sos/cleaner/parsers/hostname_parser.py | 19 +++++++++++++++++++
tests/unittests/cleaner_tests.py | 7 +++++++
2 files changed, 26 insertions(+)
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
index debdf182..07eb40f6 100644
--- a/sos/cleaner/parsers/hostname_parser.py
+++ b/sos/cleaner/parsers/hostname_parser.py
@@ -8,6 +8,7 @@
#
# See the LICENSE file in the source distribution for further information.
+import re
from sos.cleaner.parsers import SoSCleanerParser
from sos.cleaner.mappings.hostname_map import SoSHostnameMap
@@ -29,6 +30,24 @@ class SoSHostnameParser(SoSCleanerParser):
self.load_short_names_from_mapping()
self.mapping.set_initial_counts()
+ def parse_line(self, line):
+ """This will be called for every line in every file we process, so that
+ every parser has a chance to scrub everything.
+
+ We are overriding parent method since we need to swap ordering of
+ _parse_line_with_compiled_regexes and _parse_line calls.
+ """
+ count = 0
+ for skip_pattern in self.skip_line_patterns:
+ if re.match(skip_pattern, line, re.I):
+ return line, count
+ line, _count = self._parse_line(line)
+ count += _count
+ if self.compile_regexes:
+ line, _rcount = self._parse_line_with_compiled_regexes(line)
+ count += _rcount
+ return line, count
+
def load_short_names_from_mapping(self):
"""When we load the mapping file into the hostname map, we have to do
some dancing to get those loaded properly into the "intermediate" dicts
diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py
index d27481c1..9759b38a 100644
--- a/tests/unittests/cleaner_tests.py
+++ b/tests/unittests/cleaner_tests.py
@@ -171,6 +171,13 @@ class CleanerParserTests(unittest.TestCa
_test = self.host_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)
+ def test_obfuscate_whole_fqdn_for_given_domainname(self):
+ self.host_parser.load_hostname_into_map('sostestdomain.domain')
+ line = 'let obfuscate soshost.sostestdomain.domain'
+ _test = self.host_parser.parse_line(line)[0]
+ self.assertFalse('soshost' in _test)
+ self.assertFalse('sostestdomain' in _test)
+
def test_keyword_parser_valid_line(self):
line = 'this is my foobar test line'
_test = self.kw_parser.parse_line(line)[0]
--
2.37.3