From ee1ee0467b3b7bbd01b6e491d2b1d30f0b8f6f7c Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 May 2016 09:40:20 -0400 Subject: [PATCH] extra-files: Write a metadata file enumerating extra files Introduces a new metadata file to track arbitrary files added during the extra-files phase. This file is placed in the root of each tree and is called ``extra_files.json``. It is a JSON file containing a single object, which contains a "header" key with an object describing the metadata, and a "data" key, which is an array of objects, where each object represents a file. Each object contains the "file", "checksums", and "size" keys. "file" is the relative path from the tree root to the extra file. "checksums" is an object containing one or more checksums, where the key is the digest type and the value of that key is the hex digest. Finally, the size is the size of the file in bytes. For example: { "header": {"version": "1.0}, "data": [ { "file": "GPL", "checksums": { "sha256": "8177f97513213526df2cf6184d8ff986c675afb514d4e68a404010521b880643" }, "size": 18092 }, { "file": "release-notes/notes.html", "checksums": { "sha256": "82b1ba8db522aadf101dca6404235fba179e559b95ea24ff39ee1e5d9a53bdcb" }, "size": 1120 } ] } Signed-off-by: Jeremy Cline Fixes: #295 --- AUTHORS | 1 + doc/configuration.rst | 30 ++++++++++++++- pungi/metadata.py | 45 ++++++++++++++++++++++ pungi/phases/extra_files.py | 11 +++--- pungi/util.py | 39 ++++++++++++++++++- pungi/wrappers/scm.py | 66 ++++++++++++++++++++++++++++++++- tests/test_extra_files_phase.py | 2 +- tests/test_metadata.py | 55 ++++++++++++++++++++++++++- tests/test_util.py | 27 ++++++++++++++ 9 files changed, 265 insertions(+), 11 deletions(-) diff --git a/AUTHORS b/AUTHORS index 58869fc0..d4d1ca99 100644 --- a/AUTHORS +++ b/AUTHORS @@ -13,3 +13,4 @@ Tom Callaway Joel Andres Granados Mark McLoughlin +Jeremy Cline diff --git a/doc/configuration.rst b/doc/configuration.rst index ab848b54..3f4b9d53 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -703,6 +703,34 @@ Example ] +Extra Files Metadata +-------------------- +If extra files are specified a metadata file, ``extra_files.json``, is placed +in the os/ directory and media. This metadata file is in the format: + +:: + + { + "header": {"version": "1.0}, + "data": [ + { + "file": "GPL", + "checksums": { + "sha256": "8177f97513213526df2cf6184d8ff986c675afb514d4e68a404010521b880643" + }, + "size": 18092 + }, + { + "file": "release-notes/notes.html", + "checksums": { + "sha256": "82b1ba8db522aadf101dca6404235fba179e559b95ea24ff39ee1e5d9a53bdcb" + }, + "size": 1120 + } + ] + } + + Productimg Settings =================== Product images are placed on installation media and provide additional branding @@ -1240,7 +1268,7 @@ Translate Paths Settings This feature becomes useful when you need to transform compose location into e.g. a HTTP repo which is can be passed to ``koji image-build``. The ``path`` part is normalized via ``os.path.normpath()``. - + Example config -------------- diff --git a/pungi/metadata.py b/pungi/metadata.py index ec70e7e6..9b4d34e2 100644 --- a/pungi/metadata.py +++ b/pungi/metadata.py @@ -17,6 +17,7 @@ import os import time +import json import productmd.composeinfo import productmd.treeinfo @@ -25,6 +26,7 @@ from kobo.shortcuts import relative_path from pungi.compose_metadata.discinfo import write_discinfo as create_discinfo from pungi.compose_metadata.discinfo import write_media_repo as create_media_repo +from pungi import util as pungi_util def get_description(compose, variant, arch): @@ -317,3 +319,46 @@ def write_tree_info(compose, arch, variant, timestamp=None): path = os.path.join(compose.paths.compose.os_tree(arch=arch, variant=variant), ".treeinfo") compose.log_info("Writing treeinfo: %s" % path) ti.dump(path) + + +def write_extra_files(tree_path, files, checksum_type='sha256', logger=None): + """ + Write the metadata for all extra files added to the compose. + + :param tree_path: + Root of the tree to write the ``extra_files.json`` metadata file for. + + :param files: + A list of files that should be included in the metadata file. These + should be paths that are relative to ``tree_path``. + + :return: + Path to the metadata file written. + """ + metadata_path = os.path.join(tree_path, 'extra_files.json') + if logger: + logger.info('Calculating content of {metadata}'.format(metadata=metadata_path)) + metadata = {'header': {'version': '1.0'}, 'data': []} + for f in files: + if logger: + logger.debug('Processing {file}'.format(file=f)) + path = os.path.join(tree_path, f) + checksum = pungi_util._doCheckSum(path, checksum_type, logger) + # _doCheckSum returns in the format : _or_ False for failure + if checksum is False: + err = 'Failed to calculate the checksum for {file}.'.format(file=path) + raise RuntimeError(err) + checksum = checksum.split(':')[1] + entry = { + 'file': f, + 'checksums': {checksum_type: checksum}, + 'size': os.path.getsize(path), + } + metadata['data'].append(entry) + + if logger: + logger.info('Writing {metadata}'.format(metadata=metadata_path)) + + with open(metadata_path, 'w') as fd: + json.dump(metadata, fd, sort_keys=True, indent=4, separators=(',', ': ')) + return metadata_path diff --git a/pungi/phases/extra_files.py b/pungi/phases/extra_files.py index 2ab5de79..339d9e69 100644 --- a/pungi/phases/extra_files.py +++ b/pungi/phases/extra_files.py @@ -21,6 +21,7 @@ import fnmatch from pungi.util import get_arch_variant_data, pkg_is_rpm, copy_all from pungi.arch import split_name_arch +from pungi import metadata from pungi.wrappers.scm import get_file_from_scm, get_dir_from_scm from pungi.phases.base import ConfigGuardedPhase @@ -45,7 +46,7 @@ class ExtraFilesPhase(ConfigGuardedPhase): % (arch, variant.uid)) -def copy_extra_files(compose, cfg, arch, variant, package_sets): +def copy_extra_files(compose, cfg, arch, variant, package_sets, checksum_type='sha256'): var_dict = { "arch": arch, "variant_id": variant.id, @@ -76,12 +77,12 @@ def copy_extra_files(compose, cfg, arch, variant, package_sets): scm_dict["repo"] = rpms getter = get_file_from_scm if 'file' in scm_dict else get_dir_from_scm - getter(scm_dict, - os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/')), - logger=compose._logger) + target_path = os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/')) + getter(scm_dict, target_path, logger=compose._logger) if os.listdir(extra_files_dir): - copy_all(extra_files_dir, os_tree) + files_copied = copy_all(extra_files_dir, os_tree) + metadata.write_extra_files(os_tree, files_copied, checksum_type, compose._logger) compose.log_info("[DONE ] %s" % msg) diff --git a/pungi/util.py b/pungi/util.py index b8009b6a..f76af908 100644 --- a/pungi/util.py +++ b/pungi/util.py @@ -506,7 +506,24 @@ def get_format_substs(compose, **kwargs): def copy_all(src, dest): - """This function is equivalent to running `cp src/* dest`.""" + """ + Copy all files and directories within ``src`` to the ``dest`` directory. + + This is equivalent to running ``cp -r src/* dest``. + + :param src: + Source directory to copy from. + + :param dest: + Destination directory to copy to. + + :return: + A list of relative paths to the files copied. + + Example: + >>> _copy_all('/tmp/src/', '/tmp/dest/') + ['file1', 'dir1/file2', 'dir1/subdir/file3'] + """ contents = os.listdir(src) if not contents: raise RuntimeError('Source directory %s is empty.' % src) @@ -519,6 +536,26 @@ def copy_all(src, dest): else: shutil.copy2(source, destination) + return recursive_file_list(src) + + +def recursive_file_list(directory): + """Return a list of files contained in ``directory``. + + The files are paths relative to ``directory`` + + :param directory: + Path to the directory to list. + + Example: + >>> recursive_file_list('/some/dir') + ['file1', 'subdir/file2'] + """ + file_list = [] + for root, dirs, files in os.walk(directory): + file_list += [os.path.relpath(os.path.join(root, f), directory) for f in files] + return file_list + def levenshtein(a, b): """Compute Levenshtein edit distance between two strings.""" diff --git a/pungi/wrappers/scm.py b/pungi/wrappers/scm.py index f601b369..3b8459e5 100644 --- a/pungi/wrappers/scm.py +++ b/pungi/wrappers/scm.py @@ -212,6 +212,36 @@ def _get_wrapper(scm_type, *args, **kwargs): def get_file_from_scm(scm_dict, target_path, logger=None): + """ + Copy one or more files from source control to a target path. A list of files + created in ``target_path`` is returned. + + :param scm_dict: + A dictionary describing the source control repository; this can + optionally be a path to a directory on the local filesystem or reference + an RPM. Supported keys for the dictionary are ``scm``, ``repo``, + ``file``, and ``branch``. ``scm`` is the type of version control system + used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository + (or, if 'rpm' is the ``scm``, the package name), ``file`` is either a + path or list of paths to copy, and ``branch`` is the branch to check + out, if any. + + :param target_path: + The destination path for the files being copied. + + :param logger: + The logger to use for any logging performed. + + Example: + >>> scm_dict = { + >>> 'scm': 'git', + >>> 'repo': 'https://pagure.io/pungi.git', + >>> 'file': ['share/variants.dtd'], + >>> } + >>> target_path = '/tmp/path/' + >>> get_file_from_scm(scm_dict, target_path) + ['/tmp/path/share/variants.dtd'] + """ if isinstance(scm_dict, str): scm_type = "file" scm_repo = None @@ -225,14 +255,45 @@ def get_file_from_scm(scm_dict, target_path, logger=None): scm = _get_wrapper(scm_type, logger=logger) + files_copied = [] for i in force_list(scm_file): tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_") scm.export_file(scm_repo, i, scm_branch=scm_branch, target_dir=tmp_dir) - copy_all(tmp_dir, target_path) + files_copied += copy_all(tmp_dir, target_path) shutil.rmtree(tmp_dir) + return files_copied def get_dir_from_scm(scm_dict, target_path, logger=None): + """ + Copy a directory from source control to a target path. A list of files + created in ``target_path`` is returned. + + :param scm_dict: + A dictionary describing the source control repository; this can + optionally be a path to a directory on the local filesystem or reference + an RPM. Supported keys for the dictionary are ``scm``, ``repo``, + ``dir``, and ``branch``. ``scm`` is the type of version control system + used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository + (or, if 'rpm' is the ``scm``, the package name), ``dir`` is the + directory to copy, and ``branch`` is the branch to check out, if any. + + :param target_path: + The destination path for the directory being copied. + + :param logger: + The logger to use for any logging performed. + + Example: + >>> scm_dict = { + >>> 'scm': 'git', + >>> 'repo': 'https://pagure.io/pungi.git', + >>> 'dir': 'share, + >>> } + >>> target_path = '/tmp/path/' + >>> get_dir_from_scm(scm_dict, target_path) + ['/tmp/path/share/variants.dtd', '/tmp/path/share/rawhide-fedora.ks', ...] + """ if isinstance(scm_dict, str): scm_type = "file" scm_repo = None @@ -248,5 +309,6 @@ def get_dir_from_scm(scm_dict, target_path, logger=None): tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_") scm.export_dir(scm_repo, scm_dir, scm_branch=scm_branch, target_dir=tmp_dir) - copy_all(tmp_dir, target_path) + files_copied = copy_all(tmp_dir, target_path) shutil.rmtree(tmp_dir) + return files_copied diff --git a/tests/test_extra_files_phase.py b/tests/test_extra_files_phase.py index e29c57e8..94e443c8 100755 --- a/tests/test_extra_files_phase.py +++ b/tests/test_extra_files_phase.py @@ -81,7 +81,6 @@ class TestCopyFiles(helpers.PungiTestCase): compose = helpers.DummyCompose(self.topdir, {}) cfg = {'scm': 'file', 'dir': os.path.join(self.topdir, 'src'), 'repo': None, 'target': 'subdir'} - extra_files.copy_extra_files(compose, [cfg], 'x86_64', compose.variants['Server'], mock.Mock()) @@ -147,6 +146,7 @@ class TestCopyFiles(helpers.PungiTestCase): def fake_get_file(self, scm_dict, dest, logger): self.scm_dict = scm_dict helpers.touch(os.path.join(dest, scm_dict['file'])) + return [scm_dict['file']] if __name__ == "__main__": diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 8ae07d67..2b4be550 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # -*- coding: utf-8 -*- - +import json import mock import unittest import os @@ -159,5 +159,58 @@ class MediaRepoTestCase(helpers.PungiTestCase): self.assertFalse(os.path.isfile(self.path)) +class TestWriteExtraFiles(helpers.PungiTestCase): + + def setUp(self): + super(TestWriteExtraFiles, self).setUp() + self.compose = helpers.DummyCompose(self.topdir, {}) + + def test_write_extra_files(self): + """Assert metadata is written to the proper location with valid data""" + mock_logger = mock.Mock() + files = ['file1', 'file2', 'subdir/file3'] + expected_metadata = { + u'header': {u'version': u'1.0'}, + u'data': [ + { + u'file': u'file1', + u'checksums': {u'sha256': u'ecdc5536f73bdae8816f0ea40726ef5e9b810d914493075903bb90623d97b1d8'}, + u'size': 6, + }, + { + u'file': u'file2', + u'checksums': {u'sha256': u'67ee5478eaadb034ba59944eb977797b49ca6aa8d3574587f36ebcbeeb65f70e'}, + u'size': 6, + }, + { + u'file': u'subdir/file3', + u'checksums': {u'sha256': u'52f9f0e467e33da811330cad085fdb4eaa7abcb9ebfe6001e0f5910da678be51'}, + u'size': 13, + }, + ] + } + tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os') + for f in files: + helpers.touch(os.path.join(tree_dir, f), f + '\n') + + metadata_file = metadata.write_extra_files(tree_dir, files, logger=mock_logger) + with open(metadata_file) as metadata_fd: + actual_metadata = json.load(metadata_fd) + + self.assertEqual(expected_metadata['header'], actual_metadata['header']) + self.assertEqual(expected_metadata['data'], actual_metadata['data']) + + def test_write_extra_files_missing_file(self): + """Assert metadata is written to the proper location with valid data""" + mock_logger = mock.Mock() + files = ['file1', 'file2', 'subdir/file3'] + tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os') + for f in files: + helpers.touch(os.path.join(tree_dir, f), f + '\n') + files.append('missing_file') + + self.assertRaises(RuntimeError, metadata.write_extra_files, tree_dir, files, 'sha256', mock_logger) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_util.py b/tests/test_util.py index 77ca063b..8e21cbc0 100755 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -379,5 +379,32 @@ class TestLevenshtein(unittest.TestCase): self.assertEqual(util.levenshtein('kitten', 'sitting'), 3) +class TestRecursiveFileList(unittest.TestCase): + + def setUp(self): + self.tmp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + + def test_flat_file_list(self): + """Build a directory containing files and assert they are listed.""" + expected_files = sorted(['file1', 'file2', 'file3']) + for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]: + touch(expected_file) + + actual_files = sorted(util.recursive_file_list(self.tmp_dir)) + self.assertEqual(expected_files, actual_files) + + def test_nested_file_list(self): + """Build a directory containing files and assert they are listed.""" + expected_files = sorted(['file1', 'subdir/file2', 'sub/subdir/file3']) + for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]: + touch(expected_file) + + actual_files = sorted(util.recursive_file_list(self.tmp_dir)) + self.assertEqual(expected_files, actual_files) + + if __name__ == "__main__": unittest.main()