From 1abc4e96638e819d3fbee74396b36a6ccaf0ab29 Mon Sep 17 00:00:00 2001 From: Matej Tyc Date: Tue, 3 Aug 2021 11:01:59 +0200 Subject: [PATCH] Refactor content identification Don't use the multiprocessing pool - it sometimes creates probems during its initialization: https://bugzilla.redhat.com/show_bug.cgi?id=1989441 --- org_fedora_oscap/content_handling.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/org_fedora_oscap/content_handling.py b/org_fedora_oscap/content_handling.py index f2af22f..65d5a28 100644 --- a/org_fedora_oscap/content_handling.py +++ b/org_fedora_oscap/content_handling.py @@ -111,9 +111,8 @@ def parse_HTML_from_content(content): def identify_files(fpaths): - with multiprocessing.Pool(os.cpu_count()) as p: - labels = p.map(get_doc_type, fpaths) - return {path: label for (path, label) in zip(fpaths, labels)} + result = {path: get_doc_type(path) for path in fpaths} + return result def get_doc_type(file_path): @@ -131,7 +130,9 @@ def get_doc_type(file_path): except UnicodeDecodeError: # 'oscap info' supplied weird output, which happens when it tries # to explain why it can't examine e.g. a JPG. - return None + pass + except Exception as e: + log.warning(f"OSCAP addon: Unexpected error when looking at {file_path}: {str(e)}") log.info("OSCAP addon: Identified {file_path} as {content_type}" .format(file_path=file_path, content_type=content_type)) return content_type