From e2c47422b0ecfd561a8fe203b53e4a3831ae0ff7 Mon Sep 17 00:00:00 2001
From: Matej Tyc <matyc@redhat.com>
Date: Tue, 22 Nov 2022 11:45:11 +0100
Subject: [PATCH 1/3] Fix handling of content paths

Archives and ready-to-use content use paths differently.

Archives get unpacked into a directory, where they need to be unpacked,
analyzed, and cross-checked with e.g. the supplied content path,
whereas ready-to-use content can be used directly.

As the current codebase doesn't untangle all possible ways how to obtain
existing content in a way of decomposing those into layers, this change
just makes the current code working at the expense of making it worse to
maintain.
---
 org_fedora_oscap/content_discovery.py | 34 ++++++++++++++++++---------
 org_fedora_oscap/service/kickstart.py | 10 +++++++-
 tests/test_content_discovery.py       | 21 +++++++++++++++++
 3 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/org_fedora_oscap/content_discovery.py b/org_fedora_oscap/content_discovery.py
index 4235af7..ebef618 100644
--- a/org_fedora_oscap/content_discovery.py
+++ b/org_fedora_oscap/content_discovery.py
@@ -46,6 +46,14 @@ def clear_all(data):
     data.dry_run = False
 
 
+def path_is_present_among_paths(path, paths):
+    absolute_path = os.path.abspath(path)
+    for second_path in paths:
+        if absolute_path == os.path.abspath(second_path):
+            return True
+    return False
+
+
 class ContentBringer:
     CONTENT_DOWNLOAD_LOCATION = pathlib.Path(common.INSTALLATION_CONTENT_DIR)
     DEFAULT_SSG_DATA_STREAM_PATH = f"{common.SSG_DIR}/{common.SSG_CONTENT}"
@@ -194,7 +202,7 @@ def _verify_fingerprint(self, dest_filename, fingerprint=""):
         log.info(f"Integrity check passed using {hash_obj.name} hash")
 
     def allow_one_expected_tailoring_or_no_tailoring(self, labelled_files):
-        expected_tailoring = self._addon_data.tailoring_path
+        expected_tailoring = common.get_preinst_tailoring_path(self._addon_data)
         tailoring_label = CONTENT_TYPES["TAILORING"]
         if expected_tailoring:
             labelled_files = self.reduce_files(labelled_files, expected_tailoring, [tailoring_label])
@@ -206,7 +214,7 @@ def allow_one_expected_tailoring_or_no_tailoring(self, labelled_files):
         return labelled_files
 
     def filter_discovered_content(self, labelled_files):
-        expected_path = self._addon_data.content_path
+        expected_path = common.get_preinst_content_path(self._addon_data)
         categories = (CONTENT_TYPES["DATASTREAM"], CONTENT_TYPES["XCCDF_CHECKLIST"])
         if expected_path:
             labelled_files = self.reduce_files(labelled_files, expected_path, categories)
@@ -222,7 +230,7 @@ def filter_discovered_content(self, labelled_files):
 
     def reduce_files(self, labelled_files, expected_path, categories):
         reduced_files = dict()
-        if expected_path not in labelled_files:
+        if not path_is_present_among_paths(expected_path, labelled_files.keys()):
             msg = (
                 f"Expected a file {expected_path} to be part of the supplied content, "
                 f"but it was not the case, got only {list(labelled_files.keys())}"
@@ -253,13 +261,9 @@ def _finish_actual_fetch(self, wait_for, fingerprint, report_callback, dest_file
             structured_content.add_content_archive(dest_filename)
 
         labelled_filenames = content_handling.identify_files(fpaths)
-        labelled_relative_filenames = {
-            os.path.relpath(path, self.CONTENT_DOWNLOAD_LOCATION): label
-            for path, label in labelled_filenames.items()}
-        labelled_relative_filenames = self.filter_discovered_content(labelled_relative_filenames)
+        labelled_filenames = self.filter_discovered_content(labelled_filenames)
 
-        for rel_fname, label in labelled_relative_filenames.items():
-            fname = self.CONTENT_DOWNLOAD_LOCATION / rel_fname
+        for fname, label in labelled_filenames.items():
             structured_content.add_file(str(fname), label)
 
         if fingerprint and dest_filename:
@@ -303,11 +307,18 @@ def use_downloaded_content(self, content):
         # We know that we have ended up with a datastream-like content,
         # but if we can't convert an archive to a datastream.
         # self._addon_data.content_type = "datastream"
-        self._addon_data.content_path = str(preferred_content.relative_to(content.root))
+        content_type = self._addon_data.content_type
+        if content_type in ("archive", "rpm"):
+            self._addon_data.content_path = str(preferred_content.relative_to(content.root))
+        else:
+            self._addon_data.content_path = str(preferred_content)
 
         preferred_tailoring = self.get_preferred_tailoring(content)
         if content.tailoring:
-            self._addon_data.tailoring_path = str(preferred_tailoring.relative_to(content.root))
+            if content_type in ("archive", "rpm"):
+                self._addon_data.tailoring_path = str(preferred_tailoring.relative_to(content.root))
+            else:
+                self._addon_data.tailoring_path = str(preferred_tailoring)
 
     def use_system_content(self, content=None):
         clear_all(self._addon_data)
@@ -403,6 +414,7 @@ def _xccdf_content(self):
 
     def find_expected_usable_content(self, relative_expected_content_path):
         content_path = self.root / relative_expected_content_path
+        content_path = content_path.resolve()
         eligible_main_content = (self._datastream_content(), self._xccdf_content())
 
         if content_path in eligible_main_content:
diff --git a/org_fedora_oscap/service/kickstart.py b/org_fedora_oscap/service/kickstart.py
index ce049d1..6698978 100644
--- a/org_fedora_oscap/service/kickstart.py
+++ b/org_fedora_oscap/service/kickstart.py
@@ -17,6 +17,7 @@
 #
 import logging
 import re
+import os
 
 from pyanaconda.core.kickstart import KickstartSpecification
 from pyanaconda.core.kickstart.addon import AddonData
@@ -146,7 +147,14 @@ def _parse_profile_id(self, value):
         self.policy_data.profile_id = value
 
     def _parse_content_path(self, value):
-        # need to be checked?
+        absolute_content_path_in_archive_like_file = (
+            self.policy_data.content_type in ("archive", "rpm")
+            and os.path.isabs(value))
+        if absolute_content_path_in_archive_like_file:
+            msg = (
+                "When using archives-like content input, the corresponding content path "
+                "has to be relative, but got '{value}'.")
+            raise KickstartValueError(msg)
         self.policy_data.content_path = value
 
     def _parse_cpe_path(self, value):
diff --git a/tests/test_content_discovery.py b/tests/test_content_discovery.py
index 5463c9a..d6e14d9 100644
--- a/tests/test_content_discovery.py
+++ b/tests/test_content_discovery.py
@@ -1,3 +1,5 @@
+import os
+
 import pytest
 
 import org_fedora_oscap.content_discovery as tested_module
@@ -46,3 +48,22 @@ def test_reduce(labelled_files):
 
     reduced = bringer.reduce_files(labelled_files, "cpe", ["C"])
     assert reduced == labelled_files
+
+
+def test_path_presence_detection():
+    list_of_paths = ["file1", os.path.abspath("file2"), os.path.abspath("dir///file3")]
+
+    list_of_paths_in_list = [
+        "file1", os.path.abspath("file1"), "./file1",
+        "file2", "dir/..//file2",
+        "dir/../dir/file3", "dir/file3",
+    ]
+    list_of_paths_not_in_list = [
+        "../file1", "file3"
+    ]
+
+    for path in list_of_paths_in_list:
+        assert tested_module.path_is_present_among_paths(path, list_of_paths)
+
+    for path in list_of_paths_not_in_list:
+        assert not tested_module.path_is_present_among_paths(path, list_of_paths)

From 9808e21ff4e6a4ce878d556f26cfddede04c870f Mon Sep 17 00:00:00 2001
From: Matej Tyc <matyc@redhat.com>
Date: Wed, 16 Nov 2022 15:35:09 +0100
Subject: [PATCH 2/3] Compare paths according to their equivalence

not according their arbitrary string form
---
 org_fedora_oscap/content_discovery.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/org_fedora_oscap/content_discovery.py b/org_fedora_oscap/content_discovery.py
index ebef618..9da44e7 100644
--- a/org_fedora_oscap/content_discovery.py
+++ b/org_fedora_oscap/content_discovery.py
@@ -46,10 +46,14 @@ def clear_all(data):
     data.dry_run = False
 
 
+def paths_are_equivalent(p1, p2):
+    return os.path.abspath(p1) == os.path.abspath(p2)
+
+
 def path_is_present_among_paths(path, paths):
     absolute_path = os.path.abspath(path)
     for second_path in paths:
-        if absolute_path == os.path.abspath(second_path):
+        if paths_are_equivalent(path, second_path):
             return True
     return False
 
@@ -237,7 +241,7 @@ def reduce_files(self, labelled_files, expected_path, categories):
             )
             raise RuntimeError(msg)
         for path, label in labelled_files.items():
-            if label in categories and path != expected_path:
+            if label in categories and not paths_are_equivalent(path, expected_path):
                 continue
             reduced_files[path] = label
         return reduced_files

From b422abba29a9304225c97e79945cf0f1a21de810 Mon Sep 17 00:00:00 2001
From: Matej Tyc <matyc@redhat.com>
Date: Tue, 22 Nov 2022 15:44:13 +0100
Subject: [PATCH 3/3] Fix tests when relative content paths are enforced

---
 org_fedora_oscap/content_discovery.py    |  2 +-
 org_fedora_oscap/service/installation.py |  7 ++++++-
 tests/test_content_discovery.py          |  3 ++-
 tests/test_installation.py               |  2 +-
 tests/test_kickstart.py                  |  6 +++---
 tests/test_service_kickstart.py          | 19 +++++++++++++++----
 6 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/org_fedora_oscap/content_discovery.py b/org_fedora_oscap/content_discovery.py
index 9da44e7..61c4930 100644
--- a/org_fedora_oscap/content_discovery.py
+++ b/org_fedora_oscap/content_discovery.py
@@ -239,7 +239,7 @@ def reduce_files(self, labelled_files, expected_path, categories):
                 f"Expected a file {expected_path} to be part of the supplied content, "
                 f"but it was not the case, got only {list(labelled_files.keys())}"
             )
-            raise RuntimeError(msg)
+            raise content_handling.ContentHandlingError(msg)
         for path, label in labelled_files.items():
             if label in categories and not paths_are_equivalent(path, expected_path):
                 continue
diff --git a/org_fedora_oscap/service/installation.py b/org_fedora_oscap/service/installation.py
index f667479..5ca102c 100644
--- a/org_fedora_oscap/service/installation.py
+++ b/org_fedora_oscap/service/installation.py
@@ -23,10 +23,11 @@
 from pyanaconda.core import util
 from pyanaconda.modules.common.task import Task
 from pyanaconda.modules.common.errors.installation import NonCriticalInstallationError
+from pykickstart.errors import KickstartValueError
 
 from org_fedora_oscap import common, data_fetch, rule_handling, utils
 from org_fedora_oscap.common import _, get_packages_data, set_packages_data
-from org_fedora_oscap.content_handling import ContentCheckError
+from org_fedora_oscap.content_handling import ContentCheckError, ContentHandlingError
 from org_fedora_oscap import content_discovery
 
 log = logging.getLogger("anaconda")
@@ -48,6 +49,10 @@ def _handle_error(exception):
         msg = _("There was an error fetching and loading the security content:\n" +
                 f"{str(exception)}")
         terminate(msg)
+    elif isinstance(exception, ContentHandlingError):
+        msg = _("There was a problem with the supplied security content:\n" +
+                f"{str(exception)}")
+        terminate(msg)
 
     else:
         msg = _("There was an unexpected problem with the supplied content.")
diff --git a/tests/test_content_discovery.py b/tests/test_content_discovery.py
index d6e14d9..d664ede 100644
--- a/tests/test_content_discovery.py
+++ b/tests/test_content_discovery.py
@@ -3,6 +3,7 @@
 import pytest
 
 import org_fedora_oscap.content_discovery as tested_module
+from org_fedora_oscap import content_handling
 
 
 @pytest.fixture
@@ -43,7 +44,7 @@ def test_reduce(labelled_files):
     assert len(reduced) == len(labelled_files) - d_count - x_count + 1
     assert "dir/XCCDF" in reduced
 
-    with pytest.raises(RuntimeError, match="dir/datastream4"):
+    with pytest.raises(content_handling.ContentHandlingError, match="dir/datastream4"):
         bringer.reduce_files(labelled_files, "dir/datastream4", ["D"])
 
     reduced = bringer.reduce_files(labelled_files, "cpe", ["C"])
diff --git a/tests/test_installation.py b/tests/test_installation.py
index 302f5ed..2cf78db 100644
--- a/tests/test_installation.py
+++ b/tests/test_installation.py
@@ -76,7 +76,7 @@ def test_fetch_content_task(caplog, file_path, content_path):
 
     assert task.name == "Fetch the content, and optionally perform check or archive extraction"
 
-    with pytest.raises(NonCriticalInstallationError, match="Couldn't find a valid datastream"):
+    with pytest.raises(NonCriticalInstallationError, match="Expected a file"):
         task.run()
 
 
diff --git a/tests/test_kickstart.py b/tests/test_kickstart.py
index d4cfda2..60fe63d 100644
--- a/tests/test_kickstart.py
+++ b/tests/test_kickstart.py
@@ -163,7 +163,7 @@ def test_rpm(service):
         content-url = http://example.com/oscap_content.rpm
         content-type = RPM
         profile = Web Server
-        xccdf-path = /usr/share/oscap/xccdf.xml
+        xccdf-path = usr/share/oscap/xccdf.xml
     %end
     """
     check_ks_input(service, ks_in)
@@ -198,7 +198,7 @@ def test_rpm_with_wrong_suffix(service):
         content-url = http://example.com/oscap_content.xml
         content-type = RPM
         profile = Web Server
-        xccdf-path = /usr/share/oscap/xccdf.xml
+        xccdf-path = usr/share/oscap/xccdf.xml
     %end
     """
     check_ks_input(service, ks_in, errors=[