From 3543f8fb3e19b006e0c9953479d681c3d7722cd0 Mon Sep 17 00:00:00 2001
From: Haibo Lin <hlin@redhat.com>
Date: Tue, 25 Feb 2020 11:02:39 +0800
Subject: [PATCH] pkgset: Reuse pkgset repos

JIRA: COMPOSE-4158
Signed-off-by: Haibo Lin <hlin@redhat.com>
---
 pungi/paths.py                             |   8 +
 pungi/phases/pkgset/common.py              |  35 ++--
 pungi/phases/pkgset/pkgsets.py             | 125 ++++++++++++-
 pungi/phases/pkgset/sources/source_koji.py |  14 +-
 tests/test_pkgset_common.py                |   1 +
 tests/test_pkgset_pkgsets.py               | 197 +++++++++++++++++++++
 tests/test_pkgset_source_koji.py           |   3 +
 7 files changed, 365 insertions(+), 18 deletions(-)

diff --git a/pungi/paths.py b/pungi/paths.py
index bf2ced40..1531ad05 100644
--- a/pungi/paths.py
+++ b/pungi/paths.py
@@ -504,6 +504,14 @@ class WorkPaths(object):
         filename = "pkgset_%s_file_cache.pickle" % pkgset_name
         return os.path.join(self.topdir(arch="global"), filename)
 
+    def pkgset_reuse_file(self, pkgset_name):
+        """
+        Example:
+            work/global/pkgset_f30-compose_reuse.pickle
+        """
+        filename = "pkgset_%s_reuse.pickle" % pkgset_name
+        return os.path.join(self.topdir(arch="global", create_dir=False), filename)
+
 
 class ComposePaths(object):
     def __init__(self, compose):
diff --git a/pungi/phases/pkgset/common.py b/pungi/phases/pkgset/common.py
index ef474f54..ea712995 100644
--- a/pungi/phases/pkgset/common.py
+++ b/pungi/phases/pkgset/common.py
@@ -48,12 +48,6 @@ def get_create_global_repo_cmd(compose, path_prefix, repo_dir_global, pkgset):
     createrepo_checksum = compose.conf["createrepo_checksum"]
     repo = CreaterepoWrapper(createrepo_c=createrepo_c)
 
-    pkgset.save_file_list(
-        compose.paths.work.package_list(arch="global", pkgset=pkgset),
-        remove_path_prefix=path_prefix,
-    )
-    pkgset.save_file_cache(compose.paths.work.pkgset_file_cache(pkgset.name))
-
     # find an old compose suitable for repodata reuse
     update_md_path = None
     old_repo_dir = compose.paths.old_compose_path(
@@ -196,21 +190,32 @@ class MaterializedPackageSet(object):
             pkgset_global.name, arch="global"
         )
         paths = {"global": repo_dir_global}
-        cmd = get_create_global_repo_cmd(
-            compose, path_prefix, repo_dir_global, pkgset_global
+
+        pkgset_global.save_file_list(
+            compose.paths.work.package_list(arch="global", pkgset=pkgset_global),
+            remove_path_prefix=path_prefix,
         )
-        logfile = compose.paths.log.log_file(
-            "global", "arch_repo.%s" % pkgset_global.name
+        pkgset_global.save_file_cache(
+            compose.paths.work.pkgset_file_cache(pkgset_global.name)
         )
-        t = threading.Thread(
-            target=run_create_global_repo, args=(compose, cmd, logfile)
-        )
-        t.start()
+
+        if getattr(pkgset_global, "reuse", None) is None:
+            cmd = get_create_global_repo_cmd(
+                compose, path_prefix, repo_dir_global, pkgset_global
+            )
+            logfile = compose.paths.log.log_file(
+                "global", "arch_repo.%s" % pkgset_global.name
+            )
+            t = threading.Thread(
+                target=run_create_global_repo, args=(compose, cmd, logfile)
+            )
+            t.start()
 
         package_sets = populate_arch_pkgsets(compose, path_prefix, pkgset_global)
         package_sets["global"] = pkgset_global
 
-        t.join()
+        if getattr(pkgset_global, "reuse", None) is None:
+            t.join()
 
         create_arch_repos(compose, path_prefix, paths, pkgset_global, mmd)
 
diff --git a/pungi/phases/pkgset/pkgsets.py b/pungi/phases/pkgset/pkgsets.py
index 24d87653..674d828a 100644
--- a/pungi/phases/pkgset/pkgsets.py
+++ b/pungi/phases/pkgset/pkgsets.py
@@ -20,6 +20,7 @@ It automatically finds a signed copies according to *sigkey_ordering*.
 """
 
 import itertools
+import json
 import os
 from six.moves import cPickle as pickle
 
@@ -30,7 +31,7 @@ import kobo.rpmlib
 from kobo.threads import WorkerThread, ThreadPool
 
 import pungi.wrappers.kojiwrapper
-from pungi.util import pkg_is_srpm
+from pungi.util import pkg_is_srpm, copy_all
 from pungi.arch import get_valid_arches, is_excluded
 
 
@@ -370,6 +371,7 @@ class KojiPackageSet(PackageSetBase):
         self.populate_only_packages = populate_only_packages
         self.cache_region = cache_region
         self.extra_builds = extra_builds or []
+        self.reuse = None
 
     def __getstate__(self):
         result = self.__dict__.copy()
@@ -583,6 +585,127 @@ class KojiPackageSet(PackageSetBase):
         self.log_info("[DONE ] %s" % msg)
         return result
 
+    def write_reuse_file(self, compose, include_packages):
+        """Write data to files for reusing in future.
+
+        :param compose: compose object
+        :param include_packages: an iterable of tuples (package name, arch) that should
+                                 be included.
+        """
+        reuse_file = compose.paths.work.pkgset_reuse_file(self.name)
+        self.log_info("Writing pkgset reuse file: %s" % reuse_file)
+        try:
+            with open(reuse_file, "wb") as f:
+                pickle.dump(
+                    {
+                        "name": self.name,
+                        "allow_invalid_sigkeys": self._allow_invalid_sigkeys,
+                        "arches": self.arches,
+                        "sigkeys": self.sigkey_ordering,
+                        "packages": self.packages,
+                        "populate_only_packages": self.populate_only_packages,
+                        "rpms_by_arch": self.rpms_by_arch,
+                        "srpms_by_name": self.srpms_by_name,
+                        "extra_builds": self.extra_builds,
+                        "include_packages": include_packages,
+                    },
+                    f,
+                    protocol=pickle.HIGHEST_PROTOCOL,
+                )
+        except Exception as e:
+            self.log_warning("Writing pkgset reuse file failed: %s" % str(e))
+
+    def _get_koji_event_from_file(self, event_file):
+        with open(event_file, "r") as f:
+            return json.load(f)["id"]
+
+    def try_to_reuse(self, compose, tag, inherit=True, include_packages=None):
+        """Try to reuse pkgset data of old compose.
+        :param compose: compose object
+        :param str tag: koji tag name
+        :param inherit: whether to enable tag inheritance
+        :param include_packages: an iterable of tuples (package name, arch) that should
+                                 be included.
+        """
+        self.log_info("Trying to reuse pkgset data of old compose")
+        if not compose.paths.get_old_compose_topdir():
+            self.log_debug("No old compose found. Nothing to reuse.")
+            return False
+
+        event_file = os.path.join(
+            compose.paths.work.topdir(arch="global", create_dir=False), "koji-event"
+        )
+        old_event_file = compose.paths.old_compose_path(event_file)
+
+        try:
+            koji_event = self._get_koji_event_from_file(event_file)
+            old_koji_event = self._get_koji_event_from_file(old_event_file)
+        except Exception as e:
+            self.log_debug("Can't read koji event from file: %s" % str(e))
+            return False
+
+        if koji_event != old_koji_event:
+            self.log_debug(
+                "Koji event doesn't match, querying changes between event %d and %d"
+                % (old_koji_event, koji_event)
+            )
+            changed = self.koji_proxy.queryHistory(
+                tables=["tag_listing"], tag=tag, afterEvent=old_koji_event
+            )
+            if changed["tag_listing"]:
+                self.log_debug("Builds under tag %s changed. Can't reuse." % tag)
+                return False
+
+            if inherit:
+                inherit_tags = self.koji_proxy.getFullInheritance(tag, koji_event)
+                for t in inherit_tags:
+                    changed = self.koji_proxy.queryHistory(
+                        tables=["tag_listing"],
+                        tag=t["name"],
+                        afterEvent=old_koji_event,
+                        beforeEvent=koji_event + 1,
+                    )
+                    if changed["tag_listing"]:
+                        self.log_debug(
+                            "Builds under inherited tag %s changed. Can't reuse."
+                            % t["name"]
+                        )
+                        return False
+
+        repo_dir = compose.paths.work.pkgset_repo(tag, create_dir=False)
+        old_repo_dir = compose.paths.old_compose_path(repo_dir)
+
+        old_reuse_file = compose.paths.old_compose_path(
+            compose.paths.work.pkgset_reuse_file(tag)
+        )
+
+        try:
+            self.log_debug("Loading reuse file: %s" % old_reuse_file)
+            reuse_data = self.load_old_file_cache(old_reuse_file)
+        except Exception as e:
+            self.log_debug("Failed to load reuse file: %s" % str(e))
+            return False
+
+        if (
+            reuse_data["allow_invalid_sigkeys"] == self._allow_invalid_sigkeys
+            and reuse_data["packages"] == self.packages
+            and reuse_data["populate_only_packages"] == self.populate_only_packages
+            and reuse_data["extra_builds"] == self.extra_builds
+            and reuse_data["sigkeys"] == self.sigkey_ordering
+            and reuse_data["include_packages"] == include_packages
+        ):
+            self.log_info("Copying repo data for reuse: %s" % old_repo_dir)
+            copy_all(old_repo_dir, repo_dir)
+            self.reuse = old_repo_dir
+            self.rpms_by_arch = reuse_data["rpms_by_arch"]
+            self.srpms_by_name = reuse_data["srpms_by_name"]
+            if self.old_file_cache:
+                self.file_cache = self.old_file_cache
+            return True
+        else:
+            self.log_info("Criteria does not match. Nothing to reuse.")
+            return False
+
 
 def _is_src(rpm_info):
     """Check if rpm info object returned by Koji refers to source packages."""
diff --git a/pungi/phases/pkgset/sources/source_koji.py b/pungi/phases/pkgset/sources/source_koji.py
index 8d4dbd9e..c00c2ccf 100644
--- a/pungi/phases/pkgset/sources/source_koji.py
+++ b/pungi/phases/pkgset/sources/source_koji.py
@@ -696,12 +696,20 @@ def populate_global_pkgset(compose, koji_wrapper, path_prefix, event):
                         nevra = parse_nvra(rpm_nevra)
                         modular_packages.add((nevra["name"], nevra["arch"]))
 
-        pkgset.populate(
+        pkgset.try_to_reuse(
+            compose,
             compose_tag,
-            event,
             inherit=should_inherit,
             include_packages=modular_packages,
         )
+
+        if pkgset.reuse is None:
+            pkgset.populate(
+                compose_tag,
+                event,
+                inherit=should_inherit,
+                include_packages=modular_packages,
+            )
         for variant in compose.all_variants.values():
             if compose_tag in variant_tags[variant]:
 
@@ -721,6 +729,8 @@ def populate_global_pkgset(compose, koji_wrapper, path_prefix, event):
             ),
         )
 
+        pkgset.write_reuse_file(compose, include_packages=modular_packages)
+
     return pkgsets
 
 
diff --git a/tests/test_pkgset_common.py b/tests/test_pkgset_common.py
index bb0b5d11..df04dafa 100755
--- a/tests/test_pkgset_common.py
+++ b/tests/test_pkgset_common.py
@@ -45,6 +45,7 @@ class TestMaterializedPkgsetCreate(helpers.PungiTestCase):
     def _make_pkgset(self, name):
         pkgset = mock.Mock()
         pkgset.name = name
+        pkgset.reuse = None
 
         def mock_subset(primary, arch_list, exclusive_noarch):
             self.subsets[primary] = mock.Mock()
diff --git a/tests/test_pkgset_pkgsets.py b/tests/test_pkgset_pkgsets.py
index 478e0867..47164539 100644
--- a/tests/test_pkgset_pkgsets.py
+++ b/tests/test_pkgset_pkgsets.py
@@ -514,6 +514,203 @@ class TestKojiPkgset(PkgsetCompareMixin, helpers.PungiTestCase):
         )
 
 
+class TestReuseKojiPkgset(helpers.PungiTestCase):
+    def setUp(self):
+        super(TestReuseKojiPkgset, self).setUp()
+        self.old_compose_dir = tempfile.mkdtemp()
+        self.old_compose = helpers.DummyCompose(self.old_compose_dir, {})
+        self.compose = helpers.DummyCompose(
+            self.topdir, {"old_composes": os.path.dirname(self.old_compose_dir)}
+        )
+
+        self.koji_wrapper = mock.Mock()
+
+        self.tag = "test-tag"
+        self.inherited_tag = "inherited-test-tag"
+        self.pkgset = pkgsets.KojiPackageSet(
+            self.tag, self.koji_wrapper, [None], arches=["x86_64"]
+        )
+        self.pkgset.log_debug = mock.Mock()
+        self.pkgset.log_info = mock.Mock()
+
+    def assert_not_reuse(self):
+        self.assertIsNone(getattr(self.pkgset, "reuse", None))
+
+    def test_resue_no_old_compose_found(self):
+        self.pkgset.try_to_reuse(self.compose, self.tag)
+        self.pkgset.log_info.assert_called_once_with(
+            "Trying to reuse pkgset data of old compose"
+        )
+        self.pkgset.log_debug.assert_called_once_with(
+            "No old compose found. Nothing to reuse."
+        )
+        self.assert_not_reuse()
+
+    @mock.patch.object(helpers.paths.Paths, "get_old_compose_topdir")
+    def test_reuse_read_koji_event_file_failed(self, mock_old_topdir):
+        mock_old_topdir.return_value = self.old_compose_dir
+        self.pkgset._get_koji_event_from_file = mock.Mock(
+            side_effect=Exception("unknown error")
+        )
+        self.pkgset.try_to_reuse(self.compose, self.tag)
+        self.pkgset.log_debug.assert_called_once_with(
+            "Can't read koji event from file: unknown error"
+        )
+        self.assert_not_reuse()
+
+    @mock.patch.object(helpers.paths.Paths, "get_old_compose_topdir")
+    def test_reuse_build_under_tag_changed(self, mock_old_topdir):
+        mock_old_topdir.return_value = self.old_compose_dir
+        self.pkgset._get_koji_event_from_file = mock.Mock(side_effect=[3, 1])
+        self.koji_wrapper.koji_proxy.queryHistory.return_value = {"tag_listing": [{}]}
+
+        self.pkgset.try_to_reuse(self.compose, self.tag)
+
+        self.assertEqual(
+            self.pkgset.log_debug.mock_calls,
+            [
+                mock.call(
+                    "Koji event doesn't match, querying changes between event 1 and 3"
+                ),
+                mock.call("Builds under tag %s changed. Can't reuse." % self.tag),
+            ],
+        )
+        self.assert_not_reuse()
+
+    @mock.patch.object(helpers.paths.Paths, "get_old_compose_topdir")
+    def test_reuse_build_under_inherited_tag_changed(self, mock_old_topdir):
+        mock_old_topdir.return_value = self.old_compose_dir
+        self.pkgset._get_koji_event_from_file = mock.Mock(side_effect=[3, 1])
+        self.koji_wrapper.koji_proxy.queryHistory.side_effect = [
+            {"tag_listing": []},
+            {"tag_listing": [{}]},
+        ]
+        self.koji_wrapper.koji_proxy.getFullInheritance.return_value = [
+            {"name": self.inherited_tag}
+        ]
+
+        self.pkgset.try_to_reuse(self.compose, self.tag)
+
+        self.assertEqual(
+            self.pkgset.log_debug.mock_calls,
+            [
+                mock.call(
+                    "Koji event doesn't match, querying changes between event 1 and 3"
+                ),
+                mock.call(
+                    "Builds under inherited tag %s changed. Can't reuse."
+                    % self.inherited_tag
+                ),
+            ],
+        )
+        self.assert_not_reuse()
+
+    @mock.patch("pungi.paths.os.path.exists", return_value=True)
+    @mock.patch.object(helpers.paths.Paths, "get_old_compose_topdir")
+    def test_reuse_failed_load_reuse_file(self, mock_old_topdir, mock_exists):
+        mock_old_topdir.return_value = self.old_compose_dir
+        self.pkgset._get_koji_event_from_file = mock.Mock(side_effect=[3, 1])
+        self.koji_wrapper.koji_proxy.queryHistory.return_value = {"tag_listing": []}
+        self.koji_wrapper.koji_proxy.getFullInheritance.return_value = []
+        self.pkgset.load_old_file_cache = mock.Mock(
+            side_effect=Exception("unknown error")
+        )
+
+        self.pkgset.try_to_reuse(self.compose, self.tag)
+
+        self.assertEqual(
+            self.pkgset.log_debug.mock_calls,
+            [
+                mock.call(
+                    "Koji event doesn't match, querying changes between event 1 and 3"
+                ),
+                mock.call(
+                    "Loading reuse file: %s"
+                    % os.path.join(
+                        self.old_compose_dir,
+                        "work/global",
+                        "pkgset_%s_reuse.pickle" % self.tag,
+                    )
+                ),
+                mock.call("Failed to load reuse file: unknown error"),
+            ],
+        )
+        self.assert_not_reuse()
+
+    @mock.patch("pungi.paths.os.path.exists", return_value=True)
+    @mock.patch.object(helpers.paths.Paths, "get_old_compose_topdir")
+    def test_reuse_criteria_not_match(self, mock_old_topdir, mock_exists):
+        mock_old_topdir.return_value = self.old_compose_dir
+        self.pkgset._get_koji_event_from_file = mock.Mock(side_effect=[3, 1])
+        self.koji_wrapper.koji_proxy.queryHistory.return_value = {"tag_listing": []}
+        self.koji_wrapper.koji_proxy.getFullInheritance.return_value = []
+        self.pkgset.load_old_file_cache = mock.Mock(
+            return_value={"allow_invalid_sigkeys": True}
+        )
+
+        self.pkgset.try_to_reuse(self.compose, self.tag)
+
+        self.assertEqual(
+            self.pkgset.log_debug.mock_calls,
+            [
+                mock.call(
+                    "Koji event doesn't match, querying changes between event 1 and 3"
+                ),
+                mock.call(
+                    "Loading reuse file: %s"
+                    % os.path.join(
+                        self.old_compose_dir,
+                        "work/global",
+                        "pkgset_%s_reuse.pickle" % self.tag,
+                    )
+                ),
+            ],
+        )
+        self.assertEqual(
+            self.pkgset.log_info.mock_calls,
+            [
+                mock.call("Trying to reuse pkgset data of old compose"),
+                mock.call("Criteria does not match. Nothing to reuse."),
+            ],
+        )
+        self.assert_not_reuse()
+
+    @mock.patch("pungi.phases.pkgset.pkgsets.copy_all")
+    @mock.patch("pungi.paths.os.path.exists", return_value=True)
+    @mock.patch.object(helpers.paths.Paths, "get_old_compose_topdir")
+    def test_reuse_pkgset(self, mock_old_topdir, mock_exists, mock_copy_all):
+        mock_old_topdir.return_value = self.old_compose_dir
+        self.pkgset._get_koji_event_from_file = mock.Mock(side_effect=[3, 1])
+        self.koji_wrapper.koji_proxy.queryHistory.return_value = {"tag_listing": []}
+        self.koji_wrapper.koji_proxy.getFullInheritance.return_value = []
+        self.pkgset.load_old_file_cache = mock.Mock(
+            return_value={
+                "allow_invalid_sigkeys": self.pkgset._allow_invalid_sigkeys,
+                "packages": self.pkgset.packages,
+                "populate_only_packages": self.pkgset.populate_only_packages,
+                "extra_builds": self.pkgset.extra_builds,
+                "sigkeys": self.pkgset.sigkey_ordering,
+                "include_packages": None,
+                "rpms_by_arch": mock.Mock(),
+                "srpms_by_name": mock.Mock(),
+            }
+        )
+        self.pkgset.old_file_cache = mock.Mock()
+
+        self.pkgset.try_to_reuse(self.compose, self.tag)
+
+        old_repo_dir = os.path.join(self.old_compose_dir, "work/global/repo", self.tag)
+        self.assertEqual(
+            self.pkgset.log_info.mock_calls,
+            [
+                mock.call("Trying to reuse pkgset data of old compose"),
+                mock.call("Copying repo data for reuse: %s" % old_repo_dir),
+            ],
+        )
+        self.assertEqual(old_repo_dir, self.pkgset.reuse)
+        self.assertEqual(self.pkgset.file_cache, self.pkgset.old_file_cache)
+
+
 @mock.patch("kobo.pkgset.FileCache", new=MockFileCache)
 class TestMergePackageSets(PkgsetCompareMixin, unittest.TestCase):
     def test_merge_in_another_arch(self):
diff --git a/tests/test_pkgset_source_koji.py b/tests/test_pkgset_source_koji.py
index 6d547024..8e3ef136 100644
--- a/tests/test_pkgset_source_koji.py
+++ b/tests/test_pkgset_source_koji.py
@@ -86,6 +86,7 @@ class TestPopulateGlobalPkgset(helpers.PungiTestCase):
     def test_populate(self, KojiPackageSet, materialize):
         materialize.side_effect = self.mock_materialize
 
+        KojiPackageSet.return_value.reuse = None
         orig_pkgset = KojiPackageSet.return_value
 
         pkgsets = source_koji.populate_global_pkgset(
@@ -113,6 +114,8 @@ class TestPopulateGlobalPkgset(helpers.PungiTestCase):
 
         materialize.side_effect = self.mock_materialize
 
+        KojiPackageSet.return_value.reuse = None
+
         pkgsets = source_koji.populate_global_pkgset(
             self.compose, self.koji_wrapper, "/prefix", 123456
         )