diff --git a/AUTHORS b/AUTHORS index 58869fc0..d4d1ca99 100644 --- a/AUTHORS +++ b/AUTHORS @@ -13,3 +13,4 @@ Tom Callaway Joel Andres Granados Mark McLoughlin +Jeremy Cline diff --git a/doc/configuration.rst b/doc/configuration.rst index ab848b54..3f4b9d53 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -703,6 +703,34 @@ Example ] +Extra Files Metadata +-------------------- +If extra files are specified a metadata file, ``extra_files.json``, is placed +in the os/ directory and media. This metadata file is in the format: + +:: + + { + "header": {"version": "1.0}, + "data": [ + { + "file": "GPL", + "checksums": { + "sha256": "8177f97513213526df2cf6184d8ff986c675afb514d4e68a404010521b880643" + }, + "size": 18092 + }, + { + "file": "release-notes/notes.html", + "checksums": { + "sha256": "82b1ba8db522aadf101dca6404235fba179e559b95ea24ff39ee1e5d9a53bdcb" + }, + "size": 1120 + } + ] + } + + Productimg Settings =================== Product images are placed on installation media and provide additional branding @@ -1240,7 +1268,7 @@ Translate Paths Settings This feature becomes useful when you need to transform compose location into e.g. a HTTP repo which is can be passed to ``koji image-build``. The ``path`` part is normalized via ``os.path.normpath()``. - + Example config -------------- diff --git a/pungi/metadata.py b/pungi/metadata.py index ec70e7e6..9b4d34e2 100644 --- a/pungi/metadata.py +++ b/pungi/metadata.py @@ -17,6 +17,7 @@ import os import time +import json import productmd.composeinfo import productmd.treeinfo @@ -25,6 +26,7 @@ from kobo.shortcuts import relative_path from pungi.compose_metadata.discinfo import write_discinfo as create_discinfo from pungi.compose_metadata.discinfo import write_media_repo as create_media_repo +from pungi import util as pungi_util def get_description(compose, variant, arch): @@ -317,3 +319,46 @@ def write_tree_info(compose, arch, variant, timestamp=None): path = os.path.join(compose.paths.compose.os_tree(arch=arch, variant=variant), ".treeinfo") compose.log_info("Writing treeinfo: %s" % path) ti.dump(path) + + +def write_extra_files(tree_path, files, checksum_type='sha256', logger=None): + """ + Write the metadata for all extra files added to the compose. + + :param tree_path: + Root of the tree to write the ``extra_files.json`` metadata file for. + + :param files: + A list of files that should be included in the metadata file. These + should be paths that are relative to ``tree_path``. + + :return: + Path to the metadata file written. + """ + metadata_path = os.path.join(tree_path, 'extra_files.json') + if logger: + logger.info('Calculating content of {metadata}'.format(metadata=metadata_path)) + metadata = {'header': {'version': '1.0'}, 'data': []} + for f in files: + if logger: + logger.debug('Processing {file}'.format(file=f)) + path = os.path.join(tree_path, f) + checksum = pungi_util._doCheckSum(path, checksum_type, logger) + # _doCheckSum returns in the format : _or_ False for failure + if checksum is False: + err = 'Failed to calculate the checksum for {file}.'.format(file=path) + raise RuntimeError(err) + checksum = checksum.split(':')[1] + entry = { + 'file': f, + 'checksums': {checksum_type: checksum}, + 'size': os.path.getsize(path), + } + metadata['data'].append(entry) + + if logger: + logger.info('Writing {metadata}'.format(metadata=metadata_path)) + + with open(metadata_path, 'w') as fd: + json.dump(metadata, fd, sort_keys=True, indent=4, separators=(',', ': ')) + return metadata_path diff --git a/pungi/phases/extra_files.py b/pungi/phases/extra_files.py index 2ab5de79..339d9e69 100644 --- a/pungi/phases/extra_files.py +++ b/pungi/phases/extra_files.py @@ -21,6 +21,7 @@ import fnmatch from pungi.util import get_arch_variant_data, pkg_is_rpm, copy_all from pungi.arch import split_name_arch +from pungi import metadata from pungi.wrappers.scm import get_file_from_scm, get_dir_from_scm from pungi.phases.base import ConfigGuardedPhase @@ -45,7 +46,7 @@ class ExtraFilesPhase(ConfigGuardedPhase): % (arch, variant.uid)) -def copy_extra_files(compose, cfg, arch, variant, package_sets): +def copy_extra_files(compose, cfg, arch, variant, package_sets, checksum_type='sha256'): var_dict = { "arch": arch, "variant_id": variant.id, @@ -76,12 +77,12 @@ def copy_extra_files(compose, cfg, arch, variant, package_sets): scm_dict["repo"] = rpms getter = get_file_from_scm if 'file' in scm_dict else get_dir_from_scm - getter(scm_dict, - os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/')), - logger=compose._logger) + target_path = os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/')) + getter(scm_dict, target_path, logger=compose._logger) if os.listdir(extra_files_dir): - copy_all(extra_files_dir, os_tree) + files_copied = copy_all(extra_files_dir, os_tree) + metadata.write_extra_files(os_tree, files_copied, checksum_type, compose._logger) compose.log_info("[DONE ] %s" % msg) diff --git a/pungi/util.py b/pungi/util.py index b8009b6a..f76af908 100644 --- a/pungi/util.py +++ b/pungi/util.py @@ -506,7 +506,24 @@ def get_format_substs(compose, **kwargs): def copy_all(src, dest): - """This function is equivalent to running `cp src/* dest`.""" + """ + Copy all files and directories within ``src`` to the ``dest`` directory. + + This is equivalent to running ``cp -r src/* dest``. + + :param src: + Source directory to copy from. + + :param dest: + Destination directory to copy to. + + :return: + A list of relative paths to the files copied. + + Example: + >>> _copy_all('/tmp/src/', '/tmp/dest/') + ['file1', 'dir1/file2', 'dir1/subdir/file3'] + """ contents = os.listdir(src) if not contents: raise RuntimeError('Source directory %s is empty.' % src) @@ -519,6 +536,26 @@ def copy_all(src, dest): else: shutil.copy2(source, destination) + return recursive_file_list(src) + + +def recursive_file_list(directory): + """Return a list of files contained in ``directory``. + + The files are paths relative to ``directory`` + + :param directory: + Path to the directory to list. + + Example: + >>> recursive_file_list('/some/dir') + ['file1', 'subdir/file2'] + """ + file_list = [] + for root, dirs, files in os.walk(directory): + file_list += [os.path.relpath(os.path.join(root, f), directory) for f in files] + return file_list + def levenshtein(a, b): """Compute Levenshtein edit distance between two strings.""" diff --git a/pungi/wrappers/scm.py b/pungi/wrappers/scm.py index f601b369..3b8459e5 100644 --- a/pungi/wrappers/scm.py +++ b/pungi/wrappers/scm.py @@ -212,6 +212,36 @@ def _get_wrapper(scm_type, *args, **kwargs): def get_file_from_scm(scm_dict, target_path, logger=None): + """ + Copy one or more files from source control to a target path. A list of files + created in ``target_path`` is returned. + + :param scm_dict: + A dictionary describing the source control repository; this can + optionally be a path to a directory on the local filesystem or reference + an RPM. Supported keys for the dictionary are ``scm``, ``repo``, + ``file``, and ``branch``. ``scm`` is the type of version control system + used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository + (or, if 'rpm' is the ``scm``, the package name), ``file`` is either a + path or list of paths to copy, and ``branch`` is the branch to check + out, if any. + + :param target_path: + The destination path for the files being copied. + + :param logger: + The logger to use for any logging performed. + + Example: + >>> scm_dict = { + >>> 'scm': 'git', + >>> 'repo': 'https://pagure.io/pungi.git', + >>> 'file': ['share/variants.dtd'], + >>> } + >>> target_path = '/tmp/path/' + >>> get_file_from_scm(scm_dict, target_path) + ['/tmp/path/share/variants.dtd'] + """ if isinstance(scm_dict, str): scm_type = "file" scm_repo = None @@ -225,14 +255,45 @@ def get_file_from_scm(scm_dict, target_path, logger=None): scm = _get_wrapper(scm_type, logger=logger) + files_copied = [] for i in force_list(scm_file): tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_") scm.export_file(scm_repo, i, scm_branch=scm_branch, target_dir=tmp_dir) - copy_all(tmp_dir, target_path) + files_copied += copy_all(tmp_dir, target_path) shutil.rmtree(tmp_dir) + return files_copied def get_dir_from_scm(scm_dict, target_path, logger=None): + """ + Copy a directory from source control to a target path. A list of files + created in ``target_path`` is returned. + + :param scm_dict: + A dictionary describing the source control repository; this can + optionally be a path to a directory on the local filesystem or reference + an RPM. Supported keys for the dictionary are ``scm``, ``repo``, + ``dir``, and ``branch``. ``scm`` is the type of version control system + used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository + (or, if 'rpm' is the ``scm``, the package name), ``dir`` is the + directory to copy, and ``branch`` is the branch to check out, if any. + + :param target_path: + The destination path for the directory being copied. + + :param logger: + The logger to use for any logging performed. + + Example: + >>> scm_dict = { + >>> 'scm': 'git', + >>> 'repo': 'https://pagure.io/pungi.git', + >>> 'dir': 'share, + >>> } + >>> target_path = '/tmp/path/' + >>> get_dir_from_scm(scm_dict, target_path) + ['/tmp/path/share/variants.dtd', '/tmp/path/share/rawhide-fedora.ks', ...] + """ if isinstance(scm_dict, str): scm_type = "file" scm_repo = None @@ -248,5 +309,6 @@ def get_dir_from_scm(scm_dict, target_path, logger=None): tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_") scm.export_dir(scm_repo, scm_dir, scm_branch=scm_branch, target_dir=tmp_dir) - copy_all(tmp_dir, target_path) + files_copied = copy_all(tmp_dir, target_path) shutil.rmtree(tmp_dir) + return files_copied diff --git a/tests/test_extra_files_phase.py b/tests/test_extra_files_phase.py index e29c57e8..94e443c8 100755 --- a/tests/test_extra_files_phase.py +++ b/tests/test_extra_files_phase.py @@ -81,7 +81,6 @@ class TestCopyFiles(helpers.PungiTestCase): compose = helpers.DummyCompose(self.topdir, {}) cfg = {'scm': 'file', 'dir': os.path.join(self.topdir, 'src'), 'repo': None, 'target': 'subdir'} - extra_files.copy_extra_files(compose, [cfg], 'x86_64', compose.variants['Server'], mock.Mock()) @@ -147,6 +146,7 @@ class TestCopyFiles(helpers.PungiTestCase): def fake_get_file(self, scm_dict, dest, logger): self.scm_dict = scm_dict helpers.touch(os.path.join(dest, scm_dict['file'])) + return [scm_dict['file']] if __name__ == "__main__": diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 8ae07d67..2b4be550 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # -*- coding: utf-8 -*- - +import json import mock import unittest import os @@ -159,5 +159,58 @@ class MediaRepoTestCase(helpers.PungiTestCase): self.assertFalse(os.path.isfile(self.path)) +class TestWriteExtraFiles(helpers.PungiTestCase): + + def setUp(self): + super(TestWriteExtraFiles, self).setUp() + self.compose = helpers.DummyCompose(self.topdir, {}) + + def test_write_extra_files(self): + """Assert metadata is written to the proper location with valid data""" + mock_logger = mock.Mock() + files = ['file1', 'file2', 'subdir/file3'] + expected_metadata = { + u'header': {u'version': u'1.0'}, + u'data': [ + { + u'file': u'file1', + u'checksums': {u'sha256': u'ecdc5536f73bdae8816f0ea40726ef5e9b810d914493075903bb90623d97b1d8'}, + u'size': 6, + }, + { + u'file': u'file2', + u'checksums': {u'sha256': u'67ee5478eaadb034ba59944eb977797b49ca6aa8d3574587f36ebcbeeb65f70e'}, + u'size': 6, + }, + { + u'file': u'subdir/file3', + u'checksums': {u'sha256': u'52f9f0e467e33da811330cad085fdb4eaa7abcb9ebfe6001e0f5910da678be51'}, + u'size': 13, + }, + ] + } + tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os') + for f in files: + helpers.touch(os.path.join(tree_dir, f), f + '\n') + + metadata_file = metadata.write_extra_files(tree_dir, files, logger=mock_logger) + with open(metadata_file) as metadata_fd: + actual_metadata = json.load(metadata_fd) + + self.assertEqual(expected_metadata['header'], actual_metadata['header']) + self.assertEqual(expected_metadata['data'], actual_metadata['data']) + + def test_write_extra_files_missing_file(self): + """Assert metadata is written to the proper location with valid data""" + mock_logger = mock.Mock() + files = ['file1', 'file2', 'subdir/file3'] + tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os') + for f in files: + helpers.touch(os.path.join(tree_dir, f), f + '\n') + files.append('missing_file') + + self.assertRaises(RuntimeError, metadata.write_extra_files, tree_dir, files, 'sha256', mock_logger) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_util.py b/tests/test_util.py index 77ca063b..8e21cbc0 100755 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -379,5 +379,32 @@ class TestLevenshtein(unittest.TestCase): self.assertEqual(util.levenshtein('kitten', 'sitting'), 3) +class TestRecursiveFileList(unittest.TestCase): + + def setUp(self): + self.tmp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + + def test_flat_file_list(self): + """Build a directory containing files and assert they are listed.""" + expected_files = sorted(['file1', 'file2', 'file3']) + for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]: + touch(expected_file) + + actual_files = sorted(util.recursive_file_list(self.tmp_dir)) + self.assertEqual(expected_files, actual_files) + + def test_nested_file_list(self): + """Build a directory containing files and assert they are listed.""" + expected_files = sorted(['file1', 'subdir/file2', 'sub/subdir/file3']) + for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]: + touch(expected_file) + + actual_files = sorted(util.recursive_file_list(self.tmp_dir)) + self.assertEqual(expected_files, actual_files) + + if __name__ == "__main__": unittest.main()