From 21101d80610c43a7c00de3dfaa5ff043d1f8324a Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Thu, 27 Oct 2022 18:00:28 +0200 Subject: [PATCH] [cleaner] Apply compile_regexes after a regular parse line Hostname parser treats strings like 'host.domain.com' with precompiled domain 'domain.com' in a wrong way. It first obfuscates the domain while subsequent _parse_line skips host obfuscation. Calling _parse_line before _parse_line_with_compiled_regexes does clean both the host name and the domain name well. Adding a unittest with a reproducer. Resolves: #3054 Signed-off-by: Pavel Moravec --- sos/cleaner/parsers/hostname_parser.py | 19 +++++++++++++++++++ tests/unittests/cleaner_tests.py | 7 +++++++ 2 files changed, 26 insertions(+) diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py index debdf182..07eb40f6 100644 --- a/sos/cleaner/parsers/hostname_parser.py +++ b/sos/cleaner/parsers/hostname_parser.py @@ -8,6 +8,7 @@ # # See the LICENSE file in the source distribution for further information. +import re from sos.cleaner.parsers import SoSCleanerParser from sos.cleaner.mappings.hostname_map import SoSHostnameMap @@ -29,6 +30,24 @@ class SoSHostnameParser(SoSCleanerParser): self.load_short_names_from_mapping() self.mapping.set_initial_counts() + def parse_line(self, line): + """This will be called for every line in every file we process, so that + every parser has a chance to scrub everything. + + We are overriding parent method since we need to swap ordering of + _parse_line_with_compiled_regexes and _parse_line calls. + """ + count = 0 + for skip_pattern in self.skip_line_patterns: + if re.match(skip_pattern, line, re.I): + return line, count + line, _count = self._parse_line(line) + count += _count + if self.compile_regexes: + line, _rcount = self._parse_line_with_compiled_regexes(line) + count += _rcount + return line, count + def load_short_names_from_mapping(self): """When we load the mapping file into the hostname map, we have to do some dancing to get those loaded properly into the "intermediate" dicts diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py index d27481c1..9759b38a 100644 --- a/tests/unittests/cleaner_tests.py +++ b/tests/unittests/cleaner_tests.py @@ -171,6 +171,13 @@ class CleanerParserTests(unittest.TestCa _test = self.host_parser.parse_line(line)[0] self.assertNotEqual(line, _test) + def test_obfuscate_whole_fqdn_for_given_domainname(self): + self.host_parser.load_hostname_into_map('sostestdomain.domain') + line = 'let obfuscate soshost.sostestdomain.domain' + _test = self.host_parser.parse_line(line)[0] + self.assertFalse('soshost' in _test) + self.assertFalse('sostestdomain' in _test) + def test_keyword_parser_valid_line(self): line = 'this is my foobar test line' _test = self.kw_parser.parse_line(line)[0] -- 2.37.3