scm-wrapper: Refactor getting files from Git

Before this patch, there were two code paths: either getting the only
the wanted content by calling git-archive, or cloning the repository and
copying the files.

Both these approaches have the downside of not allowing retriving
content from a specific git commit.

The workaround is to create a new empty repo (in the location to which
we cloned previously), fetching the specific commit to there and then
checking it out.

This supports any commit and works identically for any protocol. The
downside is that all files in that commit will be downloaded. It should
be no worse than the git-clone path, but can result in bigger transfers
than git-archive.

Unfortunately this is only supported with git 2.5+. On older version
fetch will fail with no error message (tested with 1.8.3). This can be
used to fall back to full clone. This fallback is clearly suboptimal in
terms of data transfer, but it should work reliably.

Signed-off-by: Lubomír Sedlář <lsedlar@redhat.com>
This commit is contained in:
Lubomír Sedlář 2019-02-25 14:07:58 +01:00
parent ef6bb20a0e
commit ff4e6d4782
2 changed files with 110 additions and 164 deletions

View File

@ -100,26 +100,62 @@ class CvsWrapper(ScmBase):
shutil.copy2(os.path.join(tmp_dir, scm_file), target_path)
class FetchUnsupportedError(Exception):
"""Raised when git-fetch does not support getting particular commit by hash."""
pass
class GitWrapper(ScmBase):
@retry(interval=60, timeout=300, wait_on=RuntimeError)
def _try_fetch(self, repo, branch, destdir):
"""Attempt to run git-fetch to get only the relevant commit. This
requires git >= 2.5. On unsupported versions it does not return any
error message (at least with 1.8.3). We can check that and fall back to
full clone then.
"""
try:
return run(["git", "fetch", "--depth=1", repo, branch], workdir=destdir)
except RuntimeError as exc:
if exc.output:
# We have some error message, so re-raise the error to retry.
raise
raise FetchUnsupportedError()
def _clone(self, repo, branch, destdir):
"""Get a single commit from a repository.
We can't use git-archive as that does not support arbitrary hash as
commit, and git-clone can only get a branch too. Thus the workaround is
to create a new local repo, fetch the commit from remote and then check
it out. If that fails, we get a full clone.
Finally the post-processing command is ran.
"""
if "://" not in repo:
repo = "file://%s" % repo
run(["git", "init"], workdir=destdir)
try:
self._try_fetch(repo, branch, destdir)
run(["git", "checkout", "FETCH_HEAD"], workdir=destdir)
except FetchUnsupportedError:
# Fetch failed, to do a full clone we add a remote to our empty
# repo, get its content and check out the reference we want.
run(["git", "remote", "add", "origin", repo], workdir=destdir)
self.retry_run(["git", "remote", "update", "origin"], workdir=destdir)
run(["git", "checkout", branch], workdir=destdir)
self.run_process_command(destdir)
def export_dir(self, scm_root, scm_dir, target_dir, scm_branch=None):
scm_dir = scm_dir.lstrip("/")
scm_branch = scm_branch or "master"
with temp_dir() as tmp_dir:
if "://" not in scm_root:
scm_root = "file://%s" % scm_root
self.log_debug("Exporting directory %s from git %s (branch %s)..."
% (scm_dir, scm_root, scm_branch))
cmd = ("/usr/bin/git archive --remote=%s %s %s | tar xf -"
% (shlex_quote(scm_root), shlex_quote(scm_branch), shlex_quote(scm_dir)))
# git archive is not supported by http/https
# or by smart http https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP
if scm_root.startswith("http") or self.command:
cmd = ("/usr/bin/git clone --depth 1 --branch=%s %s %s"
% (shlex_quote(scm_branch), shlex_quote(scm_root), shlex_quote(tmp_dir)))
self.retry_run(cmd, workdir=tmp_dir, show_cmd=True)
self.run_process_command(tmp_dir)
self._clone(scm_root, scm_branch, tmp_dir)
copy_all(os.path.join(tmp_dir, scm_dir), target_dir)
@ -130,20 +166,10 @@ class GitWrapper(ScmBase):
with temp_dir() as tmp_dir:
target_path = os.path.join(target_dir, os.path.basename(scm_file))
if "://" not in scm_root:
scm_root = "file://%s" % scm_root
self.log_debug("Exporting file %s from git %s (branch %s)..."
% (scm_file, scm_root, scm_branch))
cmd = ("/usr/bin/git archive --remote=%s %s %s | tar xf -"
% (shlex_quote(scm_root), shlex_quote(scm_branch), shlex_quote(scm_file)))
# git archive is not supported by http/https
# or by smart http https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP
if scm_root.startswith("http") or self.command:
cmd = ("/usr/bin/git clone --depth 1 --branch=%s %s %s"
% (shlex_quote(scm_branch), shlex_quote(scm_root), shlex_quote(tmp_dir)))
self.retry_run(cmd, workdir=tmp_dir, show_cmd=True)
self.run_process_command(tmp_dir)
self._clone(scm_root, scm_branch, tmp_dir)
makedirs(target_dir)
shutil.copy2(os.path.join(tmp_dir, scm_file), target_path)

View File

@ -104,15 +104,23 @@ class FileSCMTestCase(SCMBaseTest):
class GitSCMTestCase(SCMBaseTest):
def assertCalls(self, mock_run, url, branch, command=None):
command = [command] if command else []
self.assertEqual(
[call[0][0] for call in mock_run.call_args_list],
[
["git", "init"],
["git", "fetch", "--depth=1", url, branch],
["git", "checkout", "FETCH_HEAD"],
] + command,
)
@mock.patch('pungi.wrappers.scm.run')
def test_get_file(self, run):
commands = []
def process(cmd, workdir=None, **kwargs):
if cmd.startswith('/usr/bin/git'):
fname = cmd.split('|')[0].strip().split(' ')[-1]
touch(os.path.join(workdir, fname))
commands.append(cmd)
touch(os.path.join(workdir, 'some_file.txt'))
touch(os.path.join(workdir, 'other_file.txt'))
run.side_effect = process
@ -121,19 +129,43 @@ class GitSCMTestCase(SCMBaseTest):
'file': 'some_file.txt'},
self.destdir)
self.assertStructure(retval, ['some_file.txt'])
self.assertEqual(
commands,
['/usr/bin/git archive --remote=git://example.com/git/repo.git master some_file.txt | tar xf -'])
self.assertCalls(run, "git://example.com/git/repo.git", "master")
@mock.patch('pungi.wrappers.scm.run')
def test_get_file_generated_by_command_via_git(self, run):
commands = []
def test_get_file_fetch_fails(self, run):
url = "git://example.com/git/repo.git"
def process(cmd, workdir=None, **kwargs):
if cmd.startswith('/usr/bin/git'):
checkout = cmd.split(' ')[-1]
touch(os.path.join(checkout, 'some_file.txt'))
commands.append(cmd)
if "fetch" in cmd:
exc = RuntimeError()
exc.output = ""
raise exc
touch(os.path.join(workdir, 'some_file.txt'))
touch(os.path.join(workdir, 'other_file.txt'))
run.side_effect = process
retval = scm.get_file_from_scm(
{"scm": "git", "repo": url, "file": "some_file.txt"}, self.destdir
)
self.assertStructure(retval, ['some_file.txt'])
self.assertEqual(
[call[0][0] for call in run.call_args_list],
[
["git", "init"],
["git", "fetch", "--depth=1", url, "master"],
["git", "remote", "add", "origin", url],
["git", "remote", "update", "origin"],
["git", "checkout", "master"],
],
)
@mock.patch('pungi.wrappers.scm.run')
def test_get_file_generated_by_command(self, run):
def process(cmd, workdir=None, **kwargs):
if cmd[0] == "git":
touch(os.path.join(workdir, 'some_file.txt'))
return 0, ''
run.side_effect = process
@ -144,21 +176,16 @@ class GitSCMTestCase(SCMBaseTest):
'command': 'make'},
self.destdir)
self.assertStructure(retval, ['some_file.txt'])
self.assertRegexpMatches(
commands[0],
r'/usr/bin/git clone --depth 1 --branch=master git://example.com/git/repo.git /tmp/.+')
self.assertEqual(commands[1:], ['make'])
self.assertCalls(run, "git://example.com/git/repo.git", "master", "make")
@mock.patch('pungi.wrappers.scm.run')
def test_get_file_and_fail_to_generate(self, run):
commands = []
def process(cmd, workdir=None, **kwargs):
if cmd.startswith('/usr/bin/git'):
checkout = cmd.split(' ')[-1]
touch(os.path.join(checkout, 'some_file.txt'))
commands.append(cmd)
return len(commands), 'output'
if cmd[0] == "git":
touch(os.path.join(workdir, 'some_file.txt'))
return 0, "output"
return 1, "output"
run.side_effect = process
@ -169,63 +196,14 @@ class GitSCMTestCase(SCMBaseTest):
'command': 'make'},
self.destdir)
self.assertEqual(str(ctx.exception), "'make' failed with exit code 2")
@mock.patch('pungi.wrappers.scm.run')
def test_get_file_generated_by_command_via_https(self, run):
commands = []
def process(cmd, workdir=None, **kwargs):
if cmd.startswith('/usr/bin/git'):
checkout = cmd.split(' ')[-1]
touch(os.path.join(checkout, 'some_file.txt'))
commands.append(cmd)
return 0, ''
run.side_effect = process
retval = scm.get_file_from_scm({'scm': 'git',
'repo': 'https://example.com/git/repo.git',
'file': 'some_file.txt',
'command': 'make'},
self.destdir)
self.assertStructure(retval, ['some_file.txt'])
self.assertRegexpMatches(
commands[0],
r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+')
self.assertEqual(commands[1:], ['make'])
@mock.patch('pungi.wrappers.scm.run')
def test_get_file_via_https(self, run):
commands = []
def process(cmd, workdir=None, **kwargs):
checkout = cmd.split(' ')[-1]
touch(os.path.join(checkout, 'some_file.txt'))
touch(os.path.join(checkout, 'other_file.txt'))
commands.append(cmd)
run.side_effect = process
retval = scm.get_file_from_scm({'scm': 'git',
'repo': 'https://example.com/git/repo.git',
'file': 'some_file.txt'},
self.destdir)
self.assertStructure(retval, ['some_file.txt'])
self.assertEqual(1, len(commands))
self.assertRegexpMatches(
commands[0],
r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+')
self.assertEqual(str(ctx.exception), "'make' failed with exit code 1")
@mock.patch('pungi.wrappers.scm.run')
def test_get_dir(self, run):
commands = []
def process(cmd, workdir=None, **kwargs):
fname = cmd.split('|')[0].strip().split(' ')[-1]
touch(os.path.join(workdir, fname, 'first'))
touch(os.path.join(workdir, fname, 'second'))
commands.append(cmd)
touch(os.path.join(workdir, "subdir", 'first'))
touch(os.path.join(workdir, "subdir", 'second'))
run.side_effect = process
@ -234,21 +212,15 @@ class GitSCMTestCase(SCMBaseTest):
'dir': 'subdir'},
self.destdir)
self.assertStructure(retval, ['first', 'second'])
self.assertEqual(
commands,
['/usr/bin/git archive --remote=git://example.com/git/repo.git master subdir | tar xf -'])
self.assertCalls(run, "git://example.com/git/repo.git", "master")
@mock.patch('pungi.wrappers.scm.run')
def test_get_dir_via_git_and_generate(self, run):
commands = []
def test_get_dir_and_generate(self, run):
def process(cmd, workdir=None, **kwargs):
if cmd.startswith('/usr/bin/git'):
checkout = cmd.split(' ')[-1]
touch(os.path.join(checkout, 'subdir', 'first'))
touch(os.path.join(checkout, 'subdir', 'second'))
commands.append(cmd)
if cmd[0] == "git":
touch(os.path.join(workdir, 'subdir', 'first'))
touch(os.path.join(workdir, 'subdir', 'second'))
return 0, ''
run.side_effect = process
@ -259,59 +231,7 @@ class GitSCMTestCase(SCMBaseTest):
'command': 'make'},
self.destdir)
self.assertStructure(retval, ['first', 'second'])
self.assertRegexpMatches(
commands[0],
r'/usr/bin/git clone --depth 1 --branch=master git://example.com/git/repo.git /tmp/.+')
self.assertEqual(commands[1:], ['make'])
@mock.patch('pungi.wrappers.scm.run')
def test_get_dir_via_https(self, run):
commands = []
def process(cmd, workdir=None, **kwargs):
checkout = cmd.split(' ')[-1]
touch(os.path.join(checkout, 'subdir', 'first'))
touch(os.path.join(checkout, 'subdir', 'second'))
commands.append(cmd)
run.side_effect = process
retval = scm.get_dir_from_scm({'scm': 'git',
'repo': 'https://example.com/git/repo.git',
'dir': 'subdir'},
self.destdir)
self.assertStructure(retval, ['first', 'second'])
self.assertRegexpMatches(
commands[0],
r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+')
@mock.patch('pungi.wrappers.scm.run')
def test_get_dir_via_https_and_generate(self, run):
commands = []
def process(cmd, workdir=None, **kwargs):
if cmd.startswith('/usr/bin/git'):
checkout = cmd.split(' ')[-1]
touch(os.path.join(checkout, 'subdir', 'first'))
touch(os.path.join(checkout, 'subdir', 'second'))
commands.append(cmd)
return 0, ''
run.side_effect = process
retval = scm.get_dir_from_scm({'scm': 'git',
'repo': 'https://example.com/git/repo.git',
'dir': 'subdir',
'command': 'make'},
self.destdir)
self.assertStructure(retval, ['first', 'second'])
self.assertRegexpMatches(
commands[0],
r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+')
self.assertEqual(commands[1:], ['make'])
self.assertCalls(run, "git://example.com/git/repo.git", "master", "make")
class RpmSCMTestCase(SCMBaseTest):