From 1afd0fb1a0ed7354e7ed525bf0de3b883eddff8e Mon Sep 17 00:00:00 2001 From: Petr Stodulka Date: Thu, 19 Oct 2023 18:44:06 +0200 Subject: [PATCH 57/60] Introduce TrackedFilesInfoSource message and new actor We hit already several times a situation that an actor needed an information about specific file (whether exists, has been changed,...). And for that purpose extra scanner actor needed to be created, with an associated message and Model. To cover such cases, we are introducing new model TrackedFilesInfoSource and actor scansourcefiles. So in future, when any actor needs such a piece of information and do something based on it, developer can just update lists in the introduced actor's library, so the information about particular file will be provided. Another benefit is saving a time on writting new unit tests and code for the scan, as updating a list of files to be tracked does not affect the algorithm. --- .../common/actors/scansourcefiles/actor.py | 32 ++++++++ .../libraries/scansourcefiles.py | 79 +++++++++++++++++++ .../tests/unit_test_scansourcefiles.py | 5 ++ .../common/models/trackedfiles.py | 60 ++++++++++++++ 4 files changed, 176 insertions(+) create mode 100644 repos/system_upgrade/common/actors/scansourcefiles/actor.py create mode 100644 repos/system_upgrade/common/actors/scansourcefiles/libraries/scansourcefiles.py create mode 100644 repos/system_upgrade/common/actors/scansourcefiles/tests/unit_test_scansourcefiles.py create mode 100644 repos/system_upgrade/common/models/trackedfiles.py diff --git a/repos/system_upgrade/common/actors/scansourcefiles/actor.py b/repos/system_upgrade/common/actors/scansourcefiles/actor.py new file mode 100644 index 00000000..b368fc88 --- /dev/null +++ b/repos/system_upgrade/common/actors/scansourcefiles/actor.py @@ -0,0 +1,32 @@ +from leapp.actors import Actor +from leapp.libraries.actor import scansourcefiles +from leapp.models import TrackedFilesInfoSource +from leapp.tags import FactsPhaseTag, IPUWorkflowTag + + +class ScanSourceFiles(Actor): + """ + Scan files (explicitly specified) of the source system. + + If an actor require information about a file, like whether it's installed, + modified, etc. It can be added to the list of files to be tracked, so no + extra actor is required to be created to provide just that one information. + + The scan of all changed files tracked by RPMs is very expensive. So we rather + provide this possibility to simplify the work for others. + + See lists defined in the private library. + """ + # TODO(pstodulk): in some cases could be valuable to specify an rpm name + # and provide information about all changed files instead. Both approaches + # have a little bit different use-cases and expectations. In the second + # case it would be good solution regarding track of leapp-repository + # changed files. + + name = 'scan_source_files' + consumes = () + produces = (TrackedFilesInfoSource,) + tags = (IPUWorkflowTag, FactsPhaseTag) + + def process(self): + scansourcefiles.process() diff --git a/repos/system_upgrade/common/actors/scansourcefiles/libraries/scansourcefiles.py b/repos/system_upgrade/common/actors/scansourcefiles/libraries/scansourcefiles.py new file mode 100644 index 00000000..33e0275f --- /dev/null +++ b/repos/system_upgrade/common/actors/scansourcefiles/libraries/scansourcefiles.py @@ -0,0 +1,79 @@ +import os + +from leapp.libraries.common.config.version import get_source_major_version +from leapp.libraries.stdlib import api, CalledProcessError, run +from leapp.models import FileInfo, TrackedFilesInfoSource + +# TODO(pstodulk): make linter happy about this +# common -> Files supposed to be scanned on all system versions. +# '8' (etc..) -> files supposed to be scanned when particular major version of OS is used +TRACKED_FILES = { + 'common': [ + ], + '8': [ + ], + '9': [ + ], +} + +# TODO(pstodulk)?: introduce possibility to discover files under a dir that +# are not tracked by any rpm or a specified rpm? Currently I have only one +# use case for that in my head, so possibly it will be better to skip a generic +# solution and just introduce a new actor and msg for that (check whether +# actors not owned by our package(s) are present). + + +def _get_rpm_name(input_file): + try: + rpm_names = run(['rpm', '-qf', '--queryformat', r'%{NAME}\n', input_file], split=True)['stdout'] + except CalledProcessError: + # is not owned by any rpm + return '' + + if len(rpm_names) > 1: + # this is very seatbelt; could happen for directories, but we do + # not expect here directories specified at all. if so, we should + # provide list instead of string + api.current_logger().warning( + 'The {} file is owned by multiple rpms: {}.' + .format(input_file, ', '.join(rpm_names)) + ) + return rpm_names[0] + + +def is_modified(input_file): + """ + Return True if checksum has been changed (or removed). + + Ignores mode, user, type, ... + """ + result = run(['rpm', '-Vf', '--nomtime', input_file], checked=False) + if not result['exit_code']: + return False + status = result['stdout'].split()[0] + return status == 'missing' or '5' in status + + +def scan_file(input_file): + data = { + 'path': input_file, + 'exists': os.path.exists(input_file), + 'rpm_name': _get_rpm_name(input_file), + } + + if data['rpm_name']: + data['is_modified'] = is_modified(input_file) + else: + # it's not tracked by any rpm at all, so always False + data['is_modified'] = False + + return FileInfo(**data) + + +def scan_files(files): + return [scan_file(fname) for fname in files] + + +def process(): + files = scan_files(TRACKED_FILES['common'] + TRACKED_FILES.get(get_source_major_version(), [])) + api.produce(TrackedFilesInfoSource(files=files)) diff --git a/repos/system_upgrade/common/actors/scansourcefiles/tests/unit_test_scansourcefiles.py b/repos/system_upgrade/common/actors/scansourcefiles/tests/unit_test_scansourcefiles.py new file mode 100644 index 00000000..6a6b009a --- /dev/null +++ b/repos/system_upgrade/common/actors/scansourcefiles/tests/unit_test_scansourcefiles.py @@ -0,0 +1,5 @@ +def test_scansourcefiles(): + # TODO(pstodulk): keeping unit tests for later after I check the idea + # of this actor with the team. + # JIRA: OAMG-10367 + pass diff --git a/repos/system_upgrade/common/models/trackedfiles.py b/repos/system_upgrade/common/models/trackedfiles.py new file mode 100644 index 00000000..f7c2c809 --- /dev/null +++ b/repos/system_upgrade/common/models/trackedfiles.py @@ -0,0 +1,60 @@ +from leapp.models import fields, Model +from leapp.topics import SystemInfoTopic + + +class FileInfo(Model): + """ + Various data about a file. + + This model is not supposed to be used as a message directly. + See e.g. :class:`TrackedSourceFilesInfo` instead. + """ + topic = SystemInfoTopic + + path = fields.String() + """ + Canonical path to the file. + """ + + exists = fields.Boolean() + """ + True if the file is present on the system. + """ + + rpm_name = fields.String(default="") + """ + Name of the rpm that owns the file. Otherwise empty string if not owned + by any rpm. + """ + + # NOTE(pstodulk): I have been thinking about the "state"/"modified" field + # instead. Which could contain enum list, where could be specified what has + # been changed (checksum, type, owner, ...). But currently we do not have + # use cases for that and do not want to implement it now. So starting simply + # with this one. + is_modified = fields.Boolean() + """ + True if the checksum of the file has been changed (includes the missing state). + + The field is valid only for a file tracked by rpm - excluding ghost files. + In such a case the value is always false. + """ + + +class TrackedFilesInfoSource(Model): + """ + Provide information about files on the source system explicitly defined + in the actor to be tracked. + + Search an actor producing this message to discover the list where you + could add the file into the list to be tracked. + + This particular message is expected to be produced only once by the + specific actor. Do not produce multiple messages of this model. + """ + topic = SystemInfoTopic + + files = fields.List(fields.Model(FileInfo), default=[]) + """ + List of :class:`FileInfo`. + """ -- 2.43.0