extra-files: Write a metadata file enumerating extra files

Introduces a new metadata file to track arbitrary files added during the
extra-files phase. This file is placed in the root of each tree and is
called ``extra_files.json``. It is a JSON file containing a single
object, which contains a "header" key with an object describing the
metadata, and a "data" key, which is an array of objects, where each
object represents a file. Each object contains the "file", "checksums",
and "size" keys. "file" is the relative path from the tree root to the
extra file. "checksums" is an object containing one or more checksums,
where the key is the digest type and the value of that key is the hex
digest. Finally, the size is the size of the file in bytes.

For example:
{
  "header": {"version": "1.0},
  "data": [
    {
      "file": "GPL",
      "checksums": {
        "sha256": "8177f97513213526df2cf6184d8ff986c675afb514d4e68a404010521b880643"
      },
      "size": 18092
    },
    {
      "file": "release-notes/notes.html",
      "checksums": {
        "sha256": "82b1ba8db522aadf101dca6404235fba179e559b95ea24ff39ee1e5d9a53bdcb"
      },
      "size": 1120
    }
  ]
}

Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Fixes: #295
This commit is contained in:
Jeremy Cline 2016-05-31 09:40:20 -04:00 committed by Lubomír Sedlář
parent 6aeab9ee9d
commit ee1ee0467b
9 changed files with 265 additions and 11 deletions

View File

@ -13,3 +13,4 @@ Tom Callaway <tcallawa at redhat dot com>
Joel Andres Granados <jgranado at redhat dot com> Joel Andres Granados <jgranado at redhat dot com>
<proski at fedoraproject dot org> <proski at fedoraproject dot org>
Mark McLoughlin <markmc at redhat dot com> Mark McLoughlin <markmc at redhat dot com>
Jeremy Cline <jcline at redhat dot com>

View File

@ -703,6 +703,34 @@ Example
] ]
Extra Files Metadata
--------------------
If extra files are specified a metadata file, ``extra_files.json``, is placed
in the os/ directory and media. This metadata file is in the format:
::
{
"header": {"version": "1.0},
"data": [
{
"file": "GPL",
"checksums": {
"sha256": "8177f97513213526df2cf6184d8ff986c675afb514d4e68a404010521b880643"
},
"size": 18092
},
{
"file": "release-notes/notes.html",
"checksums": {
"sha256": "82b1ba8db522aadf101dca6404235fba179e559b95ea24ff39ee1e5d9a53bdcb"
},
"size": 1120
}
]
}
Productimg Settings Productimg Settings
=================== ===================
Product images are placed on installation media and provide additional branding Product images are placed on installation media and provide additional branding
@ -1240,7 +1268,7 @@ Translate Paths Settings
This feature becomes useful when you need to transform compose location This feature becomes useful when you need to transform compose location
into e.g. a HTTP repo which is can be passed to ``koji image-build``. into e.g. a HTTP repo which is can be passed to ``koji image-build``.
The ``path`` part is normalized via ``os.path.normpath()``. The ``path`` part is normalized via ``os.path.normpath()``.
Example config Example config
-------------- --------------

View File

@ -17,6 +17,7 @@
import os import os
import time import time
import json
import productmd.composeinfo import productmd.composeinfo
import productmd.treeinfo import productmd.treeinfo
@ -25,6 +26,7 @@ from kobo.shortcuts import relative_path
from pungi.compose_metadata.discinfo import write_discinfo as create_discinfo from pungi.compose_metadata.discinfo import write_discinfo as create_discinfo
from pungi.compose_metadata.discinfo import write_media_repo as create_media_repo from pungi.compose_metadata.discinfo import write_media_repo as create_media_repo
from pungi import util as pungi_util
def get_description(compose, variant, arch): def get_description(compose, variant, arch):
@ -317,3 +319,46 @@ def write_tree_info(compose, arch, variant, timestamp=None):
path = os.path.join(compose.paths.compose.os_tree(arch=arch, variant=variant), ".treeinfo") path = os.path.join(compose.paths.compose.os_tree(arch=arch, variant=variant), ".treeinfo")
compose.log_info("Writing treeinfo: %s" % path) compose.log_info("Writing treeinfo: %s" % path)
ti.dump(path) ti.dump(path)
def write_extra_files(tree_path, files, checksum_type='sha256', logger=None):
"""
Write the metadata for all extra files added to the compose.
:param tree_path:
Root of the tree to write the ``extra_files.json`` metadata file for.
:param files:
A list of files that should be included in the metadata file. These
should be paths that are relative to ``tree_path``.
:return:
Path to the metadata file written.
"""
metadata_path = os.path.join(tree_path, 'extra_files.json')
if logger:
logger.info('Calculating content of {metadata}'.format(metadata=metadata_path))
metadata = {'header': {'version': '1.0'}, 'data': []}
for f in files:
if logger:
logger.debug('Processing {file}'.format(file=f))
path = os.path.join(tree_path, f)
checksum = pungi_util._doCheckSum(path, checksum_type, logger)
# _doCheckSum returns in the format <type>:<digest> _or_ False for failure
if checksum is False:
err = 'Failed to calculate the checksum for {file}.'.format(file=path)
raise RuntimeError(err)
checksum = checksum.split(':')[1]
entry = {
'file': f,
'checksums': {checksum_type: checksum},
'size': os.path.getsize(path),
}
metadata['data'].append(entry)
if logger:
logger.info('Writing {metadata}'.format(metadata=metadata_path))
with open(metadata_path, 'w') as fd:
json.dump(metadata, fd, sort_keys=True, indent=4, separators=(',', ': '))
return metadata_path

View File

@ -21,6 +21,7 @@ import fnmatch
from pungi.util import get_arch_variant_data, pkg_is_rpm, copy_all from pungi.util import get_arch_variant_data, pkg_is_rpm, copy_all
from pungi.arch import split_name_arch from pungi.arch import split_name_arch
from pungi import metadata
from pungi.wrappers.scm import get_file_from_scm, get_dir_from_scm from pungi.wrappers.scm import get_file_from_scm, get_dir_from_scm
from pungi.phases.base import ConfigGuardedPhase from pungi.phases.base import ConfigGuardedPhase
@ -45,7 +46,7 @@ class ExtraFilesPhase(ConfigGuardedPhase):
% (arch, variant.uid)) % (arch, variant.uid))
def copy_extra_files(compose, cfg, arch, variant, package_sets): def copy_extra_files(compose, cfg, arch, variant, package_sets, checksum_type='sha256'):
var_dict = { var_dict = {
"arch": arch, "arch": arch,
"variant_id": variant.id, "variant_id": variant.id,
@ -76,12 +77,12 @@ def copy_extra_files(compose, cfg, arch, variant, package_sets):
scm_dict["repo"] = rpms scm_dict["repo"] = rpms
getter = get_file_from_scm if 'file' in scm_dict else get_dir_from_scm getter = get_file_from_scm if 'file' in scm_dict else get_dir_from_scm
getter(scm_dict, target_path = os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/'))
os.path.join(extra_files_dir, scm_dict.get('target', '').lstrip('/')), getter(scm_dict, target_path, logger=compose._logger)
logger=compose._logger)
if os.listdir(extra_files_dir): if os.listdir(extra_files_dir):
copy_all(extra_files_dir, os_tree) files_copied = copy_all(extra_files_dir, os_tree)
metadata.write_extra_files(os_tree, files_copied, checksum_type, compose._logger)
compose.log_info("[DONE ] %s" % msg) compose.log_info("[DONE ] %s" % msg)

View File

@ -506,7 +506,24 @@ def get_format_substs(compose, **kwargs):
def copy_all(src, dest): def copy_all(src, dest):
"""This function is equivalent to running `cp src/* dest`.""" """
Copy all files and directories within ``src`` to the ``dest`` directory.
This is equivalent to running ``cp -r src/* dest``.
:param src:
Source directory to copy from.
:param dest:
Destination directory to copy to.
:return:
A list of relative paths to the files copied.
Example:
>>> _copy_all('/tmp/src/', '/tmp/dest/')
['file1', 'dir1/file2', 'dir1/subdir/file3']
"""
contents = os.listdir(src) contents = os.listdir(src)
if not contents: if not contents:
raise RuntimeError('Source directory %s is empty.' % src) raise RuntimeError('Source directory %s is empty.' % src)
@ -519,6 +536,26 @@ def copy_all(src, dest):
else: else:
shutil.copy2(source, destination) shutil.copy2(source, destination)
return recursive_file_list(src)
def recursive_file_list(directory):
"""Return a list of files contained in ``directory``.
The files are paths relative to ``directory``
:param directory:
Path to the directory to list.
Example:
>>> recursive_file_list('/some/dir')
['file1', 'subdir/file2']
"""
file_list = []
for root, dirs, files in os.walk(directory):
file_list += [os.path.relpath(os.path.join(root, f), directory) for f in files]
return file_list
def levenshtein(a, b): def levenshtein(a, b):
"""Compute Levenshtein edit distance between two strings.""" """Compute Levenshtein edit distance between two strings."""

View File

@ -212,6 +212,36 @@ def _get_wrapper(scm_type, *args, **kwargs):
def get_file_from_scm(scm_dict, target_path, logger=None): def get_file_from_scm(scm_dict, target_path, logger=None):
"""
Copy one or more files from source control to a target path. A list of files
created in ``target_path`` is returned.
:param scm_dict:
A dictionary describing the source control repository; this can
optionally be a path to a directory on the local filesystem or reference
an RPM. Supported keys for the dictionary are ``scm``, ``repo``,
``file``, and ``branch``. ``scm`` is the type of version control system
used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository
(or, if 'rpm' is the ``scm``, the package name), ``file`` is either a
path or list of paths to copy, and ``branch`` is the branch to check
out, if any.
:param target_path:
The destination path for the files being copied.
:param logger:
The logger to use for any logging performed.
Example:
>>> scm_dict = {
>>> 'scm': 'git',
>>> 'repo': 'https://pagure.io/pungi.git',
>>> 'file': ['share/variants.dtd'],
>>> }
>>> target_path = '/tmp/path/'
>>> get_file_from_scm(scm_dict, target_path)
['/tmp/path/share/variants.dtd']
"""
if isinstance(scm_dict, str): if isinstance(scm_dict, str):
scm_type = "file" scm_type = "file"
scm_repo = None scm_repo = None
@ -225,14 +255,45 @@ def get_file_from_scm(scm_dict, target_path, logger=None):
scm = _get_wrapper(scm_type, logger=logger) scm = _get_wrapper(scm_type, logger=logger)
files_copied = []
for i in force_list(scm_file): for i in force_list(scm_file):
tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_") tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_")
scm.export_file(scm_repo, i, scm_branch=scm_branch, target_dir=tmp_dir) scm.export_file(scm_repo, i, scm_branch=scm_branch, target_dir=tmp_dir)
copy_all(tmp_dir, target_path) files_copied += copy_all(tmp_dir, target_path)
shutil.rmtree(tmp_dir) shutil.rmtree(tmp_dir)
return files_copied
def get_dir_from_scm(scm_dict, target_path, logger=None): def get_dir_from_scm(scm_dict, target_path, logger=None):
"""
Copy a directory from source control to a target path. A list of files
created in ``target_path`` is returned.
:param scm_dict:
A dictionary describing the source control repository; this can
optionally be a path to a directory on the local filesystem or reference
an RPM. Supported keys for the dictionary are ``scm``, ``repo``,
``dir``, and ``branch``. ``scm`` is the type of version control system
used ('git', 'cvs', 'rpm', etc.), ``repo`` is the URL of the repository
(or, if 'rpm' is the ``scm``, the package name), ``dir`` is the
directory to copy, and ``branch`` is the branch to check out, if any.
:param target_path:
The destination path for the directory being copied.
:param logger:
The logger to use for any logging performed.
Example:
>>> scm_dict = {
>>> 'scm': 'git',
>>> 'repo': 'https://pagure.io/pungi.git',
>>> 'dir': 'share,
>>> }
>>> target_path = '/tmp/path/'
>>> get_dir_from_scm(scm_dict, target_path)
['/tmp/path/share/variants.dtd', '/tmp/path/share/rawhide-fedora.ks', ...]
"""
if isinstance(scm_dict, str): if isinstance(scm_dict, str):
scm_type = "file" scm_type = "file"
scm_repo = None scm_repo = None
@ -248,5 +309,6 @@ def get_dir_from_scm(scm_dict, target_path, logger=None):
tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_") tmp_dir = tempfile.mkdtemp(prefix="scm_checkout_")
scm.export_dir(scm_repo, scm_dir, scm_branch=scm_branch, target_dir=tmp_dir) scm.export_dir(scm_repo, scm_dir, scm_branch=scm_branch, target_dir=tmp_dir)
copy_all(tmp_dir, target_path) files_copied = copy_all(tmp_dir, target_path)
shutil.rmtree(tmp_dir) shutil.rmtree(tmp_dir)
return files_copied

View File

@ -81,7 +81,6 @@ class TestCopyFiles(helpers.PungiTestCase):
compose = helpers.DummyCompose(self.topdir, {}) compose = helpers.DummyCompose(self.topdir, {})
cfg = {'scm': 'file', 'dir': os.path.join(self.topdir, 'src'), cfg = {'scm': 'file', 'dir': os.path.join(self.topdir, 'src'),
'repo': None, 'target': 'subdir'} 'repo': None, 'target': 'subdir'}
extra_files.copy_extra_files(compose, [cfg], 'x86_64', extra_files.copy_extra_files(compose, [cfg], 'x86_64',
compose.variants['Server'], mock.Mock()) compose.variants['Server'], mock.Mock())
@ -147,6 +146,7 @@ class TestCopyFiles(helpers.PungiTestCase):
def fake_get_file(self, scm_dict, dest, logger): def fake_get_file(self, scm_dict, dest, logger):
self.scm_dict = scm_dict self.scm_dict = scm_dict
helpers.touch(os.path.join(dest, scm_dict['file'])) helpers.touch(os.path.join(dest, scm_dict['file']))
return [scm_dict['file']]
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json
import mock import mock
import unittest import unittest
import os import os
@ -159,5 +159,58 @@ class MediaRepoTestCase(helpers.PungiTestCase):
self.assertFalse(os.path.isfile(self.path)) self.assertFalse(os.path.isfile(self.path))
class TestWriteExtraFiles(helpers.PungiTestCase):
def setUp(self):
super(TestWriteExtraFiles, self).setUp()
self.compose = helpers.DummyCompose(self.topdir, {})
def test_write_extra_files(self):
"""Assert metadata is written to the proper location with valid data"""
mock_logger = mock.Mock()
files = ['file1', 'file2', 'subdir/file3']
expected_metadata = {
u'header': {u'version': u'1.0'},
u'data': [
{
u'file': u'file1',
u'checksums': {u'sha256': u'ecdc5536f73bdae8816f0ea40726ef5e9b810d914493075903bb90623d97b1d8'},
u'size': 6,
},
{
u'file': u'file2',
u'checksums': {u'sha256': u'67ee5478eaadb034ba59944eb977797b49ca6aa8d3574587f36ebcbeeb65f70e'},
u'size': 6,
},
{
u'file': u'subdir/file3',
u'checksums': {u'sha256': u'52f9f0e467e33da811330cad085fdb4eaa7abcb9ebfe6001e0f5910da678be51'},
u'size': 13,
},
]
}
tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os')
for f in files:
helpers.touch(os.path.join(tree_dir, f), f + '\n')
metadata_file = metadata.write_extra_files(tree_dir, files, logger=mock_logger)
with open(metadata_file) as metadata_fd:
actual_metadata = json.load(metadata_fd)
self.assertEqual(expected_metadata['header'], actual_metadata['header'])
self.assertEqual(expected_metadata['data'], actual_metadata['data'])
def test_write_extra_files_missing_file(self):
"""Assert metadata is written to the proper location with valid data"""
mock_logger = mock.Mock()
files = ['file1', 'file2', 'subdir/file3']
tree_dir = os.path.join(self.topdir, 'compose', 'Server', 'x86_64', 'os')
for f in files:
helpers.touch(os.path.join(tree_dir, f), f + '\n')
files.append('missing_file')
self.assertRaises(RuntimeError, metadata.write_extra_files, tree_dir, files, 'sha256', mock_logger)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -379,5 +379,32 @@ class TestLevenshtein(unittest.TestCase):
self.assertEqual(util.levenshtein('kitten', 'sitting'), 3) self.assertEqual(util.levenshtein('kitten', 'sitting'), 3)
class TestRecursiveFileList(unittest.TestCase):
def setUp(self):
self.tmp_dir = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.tmp_dir)
def test_flat_file_list(self):
"""Build a directory containing files and assert they are listed."""
expected_files = sorted(['file1', 'file2', 'file3'])
for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]:
touch(expected_file)
actual_files = sorted(util.recursive_file_list(self.tmp_dir))
self.assertEqual(expected_files, actual_files)
def test_nested_file_list(self):
"""Build a directory containing files and assert they are listed."""
expected_files = sorted(['file1', 'subdir/file2', 'sub/subdir/file3'])
for expected_file in [os.path.join(self.tmp_dir, f) for f in expected_files]:
touch(expected_file)
actual_files = sorted(util.recursive_file_list(self.tmp_dir))
self.assertEqual(expected_files, actual_files)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()