From ff4e6d4782aab79d474e1037de98b0cf8dae6adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubom=C3=ADr=20Sedl=C3=A1=C5=99?= Date: Mon, 25 Feb 2019 14:07:58 +0100 Subject: [PATCH] scm-wrapper: Refactor getting files from Git MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this patch, there were two code paths: either getting the only the wanted content by calling git-archive, or cloning the repository and copying the files. Both these approaches have the downside of not allowing retriving content from a specific git commit. The workaround is to create a new empty repo (in the location to which we cloned previously), fetching the specific commit to there and then checking it out. This supports any commit and works identically for any protocol. The downside is that all files in that commit will be downloaded. It should be no worse than the git-clone path, but can result in bigger transfers than git-archive. Unfortunately this is only supported with git 2.5+. On older version fetch will fail with no error message (tested with 1.8.3). This can be used to fall back to full clone. This fallback is clearly suboptimal in terms of data transfer, but it should work reliably. Signed-off-by: Lubomír Sedlář --- pungi/wrappers/scm.py | 74 +++++++++++----- tests/test_scm.py | 200 +++++++++++++----------------------------- 2 files changed, 110 insertions(+), 164 deletions(-) diff --git a/pungi/wrappers/scm.py b/pungi/wrappers/scm.py index 966dbc51..a9f583e3 100644 --- a/pungi/wrappers/scm.py +++ b/pungi/wrappers/scm.py @@ -100,26 +100,62 @@ class CvsWrapper(ScmBase): shutil.copy2(os.path.join(tmp_dir, scm_file), target_path) +class FetchUnsupportedError(Exception): + """Raised when git-fetch does not support getting particular commit by hash.""" + pass + + class GitWrapper(ScmBase): + @retry(interval=60, timeout=300, wait_on=RuntimeError) + def _try_fetch(self, repo, branch, destdir): + """Attempt to run git-fetch to get only the relevant commit. This + requires git >= 2.5. On unsupported versions it does not return any + error message (at least with 1.8.3). We can check that and fall back to + full clone then. + """ + try: + return run(["git", "fetch", "--depth=1", repo, branch], workdir=destdir) + except RuntimeError as exc: + if exc.output: + # We have some error message, so re-raise the error to retry. + raise + raise FetchUnsupportedError() + + def _clone(self, repo, branch, destdir): + """Get a single commit from a repository. + + We can't use git-archive as that does not support arbitrary hash as + commit, and git-clone can only get a branch too. Thus the workaround is + to create a new local repo, fetch the commit from remote and then check + it out. If that fails, we get a full clone. + + Finally the post-processing command is ran. + """ + if "://" not in repo: + repo = "file://%s" % repo + + run(["git", "init"], workdir=destdir) + try: + self._try_fetch(repo, branch, destdir) + run(["git", "checkout", "FETCH_HEAD"], workdir=destdir) + except FetchUnsupportedError: + # Fetch failed, to do a full clone we add a remote to our empty + # repo, get its content and check out the reference we want. + run(["git", "remote", "add", "origin", repo], workdir=destdir) + self.retry_run(["git", "remote", "update", "origin"], workdir=destdir) + run(["git", "checkout", branch], workdir=destdir) + + self.run_process_command(destdir) + def export_dir(self, scm_root, scm_dir, target_dir, scm_branch=None): scm_dir = scm_dir.lstrip("/") scm_branch = scm_branch or "master" with temp_dir() as tmp_dir: - if "://" not in scm_root: - scm_root = "file://%s" % scm_root - self.log_debug("Exporting directory %s from git %s (branch %s)..." % (scm_dir, scm_root, scm_branch)) - cmd = ("/usr/bin/git archive --remote=%s %s %s | tar xf -" - % (shlex_quote(scm_root), shlex_quote(scm_branch), shlex_quote(scm_dir))) - # git archive is not supported by http/https - # or by smart http https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP - if scm_root.startswith("http") or self.command: - cmd = ("/usr/bin/git clone --depth 1 --branch=%s %s %s" - % (shlex_quote(scm_branch), shlex_quote(scm_root), shlex_quote(tmp_dir))) - self.retry_run(cmd, workdir=tmp_dir, show_cmd=True) - self.run_process_command(tmp_dir) + + self._clone(scm_root, scm_branch, tmp_dir) copy_all(os.path.join(tmp_dir, scm_dir), target_dir) @@ -130,20 +166,10 @@ class GitWrapper(ScmBase): with temp_dir() as tmp_dir: target_path = os.path.join(target_dir, os.path.basename(scm_file)) - if "://" not in scm_root: - scm_root = "file://%s" % scm_root - self.log_debug("Exporting file %s from git %s (branch %s)..." % (scm_file, scm_root, scm_branch)) - cmd = ("/usr/bin/git archive --remote=%s %s %s | tar xf -" - % (shlex_quote(scm_root), shlex_quote(scm_branch), shlex_quote(scm_file))) - # git archive is not supported by http/https - # or by smart http https://git-scm.com/book/en/v2/Git-on-the-Server-Smart-HTTP - if scm_root.startswith("http") or self.command: - cmd = ("/usr/bin/git clone --depth 1 --branch=%s %s %s" - % (shlex_quote(scm_branch), shlex_quote(scm_root), shlex_quote(tmp_dir))) - self.retry_run(cmd, workdir=tmp_dir, show_cmd=True) - self.run_process_command(tmp_dir) + + self._clone(scm_root, scm_branch, tmp_dir) makedirs(target_dir) shutil.copy2(os.path.join(tmp_dir, scm_file), target_path) diff --git a/tests/test_scm.py b/tests/test_scm.py index 3ad54ede..0b75bc47 100644 --- a/tests/test_scm.py +++ b/tests/test_scm.py @@ -104,15 +104,23 @@ class FileSCMTestCase(SCMBaseTest): class GitSCMTestCase(SCMBaseTest): + def assertCalls(self, mock_run, url, branch, command=None): + command = [command] if command else [] + self.assertEqual( + [call[0][0] for call in mock_run.call_args_list], + [ + ["git", "init"], + ["git", "fetch", "--depth=1", url, branch], + ["git", "checkout", "FETCH_HEAD"], + ] + command, + ) + @mock.patch('pungi.wrappers.scm.run') def test_get_file(self, run): - commands = [] def process(cmd, workdir=None, **kwargs): - if cmd.startswith('/usr/bin/git'): - fname = cmd.split('|')[0].strip().split(' ')[-1] - touch(os.path.join(workdir, fname)) - commands.append(cmd) + touch(os.path.join(workdir, 'some_file.txt')) + touch(os.path.join(workdir, 'other_file.txt')) run.side_effect = process @@ -121,19 +129,43 @@ class GitSCMTestCase(SCMBaseTest): 'file': 'some_file.txt'}, self.destdir) self.assertStructure(retval, ['some_file.txt']) - self.assertEqual( - commands, - ['/usr/bin/git archive --remote=git://example.com/git/repo.git master some_file.txt | tar xf -']) + self.assertCalls(run, "git://example.com/git/repo.git", "master") @mock.patch('pungi.wrappers.scm.run') - def test_get_file_generated_by_command_via_git(self, run): - commands = [] + def test_get_file_fetch_fails(self, run): + url = "git://example.com/git/repo.git" def process(cmd, workdir=None, **kwargs): - if cmd.startswith('/usr/bin/git'): - checkout = cmd.split(' ')[-1] - touch(os.path.join(checkout, 'some_file.txt')) - commands.append(cmd) + if "fetch" in cmd: + exc = RuntimeError() + exc.output = "" + raise exc + touch(os.path.join(workdir, 'some_file.txt')) + touch(os.path.join(workdir, 'other_file.txt')) + + run.side_effect = process + + retval = scm.get_file_from_scm( + {"scm": "git", "repo": url, "file": "some_file.txt"}, self.destdir + ) + self.assertStructure(retval, ['some_file.txt']) + self.assertEqual( + [call[0][0] for call in run.call_args_list], + [ + ["git", "init"], + ["git", "fetch", "--depth=1", url, "master"], + ["git", "remote", "add", "origin", url], + ["git", "remote", "update", "origin"], + ["git", "checkout", "master"], + ], + ) + + @mock.patch('pungi.wrappers.scm.run') + def test_get_file_generated_by_command(self, run): + + def process(cmd, workdir=None, **kwargs): + if cmd[0] == "git": + touch(os.path.join(workdir, 'some_file.txt')) return 0, '' run.side_effect = process @@ -144,21 +176,16 @@ class GitSCMTestCase(SCMBaseTest): 'command': 'make'}, self.destdir) self.assertStructure(retval, ['some_file.txt']) - self.assertRegexpMatches( - commands[0], - r'/usr/bin/git clone --depth 1 --branch=master git://example.com/git/repo.git /tmp/.+') - self.assertEqual(commands[1:], ['make']) + self.assertCalls(run, "git://example.com/git/repo.git", "master", "make") @mock.patch('pungi.wrappers.scm.run') def test_get_file_and_fail_to_generate(self, run): - commands = [] def process(cmd, workdir=None, **kwargs): - if cmd.startswith('/usr/bin/git'): - checkout = cmd.split(' ')[-1] - touch(os.path.join(checkout, 'some_file.txt')) - commands.append(cmd) - return len(commands), 'output' + if cmd[0] == "git": + touch(os.path.join(workdir, 'some_file.txt')) + return 0, "output" + return 1, "output" run.side_effect = process @@ -169,63 +196,14 @@ class GitSCMTestCase(SCMBaseTest): 'command': 'make'}, self.destdir) - self.assertEqual(str(ctx.exception), "'make' failed with exit code 2") - - @mock.patch('pungi.wrappers.scm.run') - def test_get_file_generated_by_command_via_https(self, run): - commands = [] - - def process(cmd, workdir=None, **kwargs): - if cmd.startswith('/usr/bin/git'): - checkout = cmd.split(' ')[-1] - touch(os.path.join(checkout, 'some_file.txt')) - commands.append(cmd) - return 0, '' - - run.side_effect = process - - retval = scm.get_file_from_scm({'scm': 'git', - 'repo': 'https://example.com/git/repo.git', - 'file': 'some_file.txt', - 'command': 'make'}, - self.destdir) - self.assertStructure(retval, ['some_file.txt']) - self.assertRegexpMatches( - commands[0], - r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+') - self.assertEqual(commands[1:], ['make']) - - @mock.patch('pungi.wrappers.scm.run') - def test_get_file_via_https(self, run): - commands = [] - - def process(cmd, workdir=None, **kwargs): - checkout = cmd.split(' ')[-1] - touch(os.path.join(checkout, 'some_file.txt')) - touch(os.path.join(checkout, 'other_file.txt')) - commands.append(cmd) - - run.side_effect = process - - retval = scm.get_file_from_scm({'scm': 'git', - 'repo': 'https://example.com/git/repo.git', - 'file': 'some_file.txt'}, - self.destdir) - self.assertStructure(retval, ['some_file.txt']) - self.assertEqual(1, len(commands)) - self.assertRegexpMatches( - commands[0], - r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+') + self.assertEqual(str(ctx.exception), "'make' failed with exit code 1") @mock.patch('pungi.wrappers.scm.run') def test_get_dir(self, run): - commands = [] def process(cmd, workdir=None, **kwargs): - fname = cmd.split('|')[0].strip().split(' ')[-1] - touch(os.path.join(workdir, fname, 'first')) - touch(os.path.join(workdir, fname, 'second')) - commands.append(cmd) + touch(os.path.join(workdir, "subdir", 'first')) + touch(os.path.join(workdir, "subdir", 'second')) run.side_effect = process @@ -234,21 +212,15 @@ class GitSCMTestCase(SCMBaseTest): 'dir': 'subdir'}, self.destdir) self.assertStructure(retval, ['first', 'second']) - - self.assertEqual( - commands, - ['/usr/bin/git archive --remote=git://example.com/git/repo.git master subdir | tar xf -']) + self.assertCalls(run, "git://example.com/git/repo.git", "master") @mock.patch('pungi.wrappers.scm.run') - def test_get_dir_via_git_and_generate(self, run): - commands = [] + def test_get_dir_and_generate(self, run): def process(cmd, workdir=None, **kwargs): - if cmd.startswith('/usr/bin/git'): - checkout = cmd.split(' ')[-1] - touch(os.path.join(checkout, 'subdir', 'first')) - touch(os.path.join(checkout, 'subdir', 'second')) - commands.append(cmd) + if cmd[0] == "git": + touch(os.path.join(workdir, 'subdir', 'first')) + touch(os.path.join(workdir, 'subdir', 'second')) return 0, '' run.side_effect = process @@ -259,59 +231,7 @@ class GitSCMTestCase(SCMBaseTest): 'command': 'make'}, self.destdir) self.assertStructure(retval, ['first', 'second']) - - self.assertRegexpMatches( - commands[0], - r'/usr/bin/git clone --depth 1 --branch=master git://example.com/git/repo.git /tmp/.+') - self.assertEqual(commands[1:], ['make']) - - @mock.patch('pungi.wrappers.scm.run') - def test_get_dir_via_https(self, run): - commands = [] - - def process(cmd, workdir=None, **kwargs): - checkout = cmd.split(' ')[-1] - touch(os.path.join(checkout, 'subdir', 'first')) - touch(os.path.join(checkout, 'subdir', 'second')) - commands.append(cmd) - - run.side_effect = process - - retval = scm.get_dir_from_scm({'scm': 'git', - 'repo': 'https://example.com/git/repo.git', - 'dir': 'subdir'}, - self.destdir) - self.assertStructure(retval, ['first', 'second']) - - self.assertRegexpMatches( - commands[0], - r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+') - - @mock.patch('pungi.wrappers.scm.run') - def test_get_dir_via_https_and_generate(self, run): - commands = [] - - def process(cmd, workdir=None, **kwargs): - if cmd.startswith('/usr/bin/git'): - checkout = cmd.split(' ')[-1] - touch(os.path.join(checkout, 'subdir', 'first')) - touch(os.path.join(checkout, 'subdir', 'second')) - commands.append(cmd) - return 0, '' - - run.side_effect = process - - retval = scm.get_dir_from_scm({'scm': 'git', - 'repo': 'https://example.com/git/repo.git', - 'dir': 'subdir', - 'command': 'make'}, - self.destdir) - self.assertStructure(retval, ['first', 'second']) - - self.assertRegexpMatches( - commands[0], - r'/usr/bin/git clone --depth 1 --branch=master https://example.com/git/repo.git /tmp/.+') - self.assertEqual(commands[1:], ['make']) + self.assertCalls(run, "git://example.com/git/repo.git", "master", "make") class RpmSCMTestCase(SCMBaseTest):