--- a/setuptools/package_index.py 1980-01-01 09:00:00.000000000 +0100 +++ b/setuptools/package_index.py 2024-07-25 10:11:40.537307665 +0200 @@ -1,5 +1,6 @@ """PyPI and direct package downloading""" import sys +import subprocess import os import re import shutil @@ -563,7 +564,7 @@ scheme = URL_SCHEME(spec) if scheme: # It's a url, download it to tmpdir - found = self._download_url(scheme.group(1), spec, tmpdir) + found = self._download_url(spec, tmpdir) base, fragment = egg_info_for_url(spec) if base.endswith('.py'): found = self.gen_setup(found, fragment, tmpdir) @@ -775,7 +776,7 @@ raise DistutilsError("Download error for %s: %s" % (url, v)) - def _download_url(self, scheme, url, tmpdir): + def _download_url(self, url, tmpdir): # Determine download filename # name, fragment = egg_info_for_url(url) @@ -790,19 +791,59 @@ filename = os.path.join(tmpdir, name) - # Download the file - # - if scheme == 'svn' or scheme.startswith('svn+'): - return self._download_svn(url, filename) - elif scheme == 'git' or scheme.startswith('git+'): - return self._download_git(url, filename) - elif scheme.startswith('hg+'): - return self._download_hg(url, filename) - elif scheme == 'file': - return urllib.request.url2pathname(urllib.parse.urlparse(url)[2]) - else: - self.url_ok(url, True) # raises error if not allowed - return self._attempt_download(url, filename) + return self._download_vcs(url, filename) or self._download_other(url, filename) + + @staticmethod + def _resolve_vcs(url): + """ + >>> rvcs = PackageIndex._resolve_vcs + >>> rvcs('git+http://foo/bar') + 'git' + >>> rvcs('hg+https://foo/bar') + 'hg' + >>> rvcs('git:myhost') + 'git' + >>> rvcs('hg:myhost') + >>> rvcs('http://foo/bar') + """ + scheme = urllib.parse.urlsplit(url).scheme + pre, sep, post = scheme.partition('+') + # svn and git have their own protocol; hg does not + allowed = set(['svn', 'git'] + ['hg'] * bool(sep)) + return next(iter({pre} & allowed), None) + + def _download_vcs(self, url, spec_filename): + vcs = self._resolve_vcs(url) + if not vcs: + return + if vcs == 'svn': + raise DistutilsError( + f"Invalid config, SVN download is not supported: {url}" + ) + + filename, _, _ = spec_filename.partition('#') + url, rev = self._vcs_split_rev_from_url(url) + + self.info(f"Doing {vcs} clone from {url} to {filename}") + subprocess.check_call([vcs, 'clone', '--quiet', url, filename]) + + co_commands = dict( + git=[vcs, '-C', filename, 'checkout', '--quiet', rev], + hg=[vcs, '--cwd', filename, 'up', '-C', '-r', rev, '-q'], + ) + if rev is not None: + self.info(f"Checking out {rev}") + subprocess.check_call(co_commands[vcs]) + + return filename + + def _download_other(self, url, filename): + scheme = urllib.parse.urlsplit(url).scheme + if scheme == 'file': # pragma: no cover + return urllib.request.url2pathname(urllib.parse.urlparse(url).path) + # raise error if not allowed + self.url_ok(url, True) + return self._attempt_download(url, filename) def scan_url(self, url): self.process_url(url, True) @@ -829,76 +870,37 @@ os.unlink(filename) raise DistutilsError("Unexpected HTML page found at " + url) - def _download_svn(self, url, filename): - url = url.split('#', 1)[0] # remove any fragment for svn's sake - creds = '' - if url.lower().startswith('svn:') and '@' in url: - scheme, netloc, path, p, q, f = urllib.parse.urlparse(url) - if not netloc and path.startswith('//') and '/' in path[2:]: - netloc, path = path[2:].split('/', 1) - auth, host = splituser(netloc) - if auth: - if ':' in auth: - user, pw = auth.split(':', 1) - creds = " --username=%s --password=%s" % (user, pw) - else: - creds = " --username=" + auth - netloc = host - parts = scheme, netloc, url, p, q, f - url = urllib.parse.urlunparse(parts) - self.info("Doing subversion checkout from %s to %s", url, filename) - os.system("svn checkout%s -q %s %s" % (creds, url, filename)) - return filename - @staticmethod - def _vcs_split_rev_from_url(url, pop_prefix=False): - scheme, netloc, path, query, frag = urllib.parse.urlsplit(url) - - scheme = scheme.split('+', 1)[-1] - - # Some fragment identification fails - path = path.split('#', 1)[0] - - rev = None - if '@' in path: - path, rev = path.rsplit('@', 1) - - # Also, discard fragment - url = urllib.parse.urlunsplit((scheme, netloc, path, query, '')) - - return url, rev - - def _download_git(self, url, filename): - filename = filename.split('#', 1)[0] - url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) - - self.info("Doing git clone from %s to %s", url, filename) - os.system("git clone --quiet %s %s" % (url, filename)) + def _vcs_split_rev_from_url(url): + """ + Given a possible VCS URL, return a clean URL and resolved revision if any. - if rev is not None: - self.info("Checking out %s", rev) - os.system("(cd %s && git checkout --quiet %s)" % ( - filename, - rev, - )) + >>> vsrfu = PackageIndex._vcs_split_rev_from_url + >>> vsrfu('git+https://github.com/pypa/setuptools@v69.0.0#egg-info=setuptools') + ('https://github.com/pypa/setuptools', 'v69.0.0') + >>> vsrfu('git+https://github.com/pypa/setuptools#egg-info=setuptools') + ('https://github.com/pypa/setuptools', None) + >>> vsrfu('http://foo/bar') + ('http://foo/bar', None) + """ + parts = urllib.parse.urlsplit(url) - return filename + clean_scheme = parts.scheme.split('+', 1)[-1] - def _download_hg(self, url, filename): - filename = filename.split('#', 1)[0] - url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) + # Some fragment identification fails + no_fragment_path, _, _ = parts.path.partition('#') - self.info("Doing hg clone from %s to %s", url, filename) - os.system("hg clone --quiet %s %s" % (url, filename)) + pre, sep, post = no_fragment_path.rpartition('@') + clean_path, rev = (pre, post) if sep else (post, None) - if rev is not None: - self.info("Updating to %s", rev) - os.system("(cd %s && hg up -C -r %s >&-)" % ( - filename, - rev, - )) + resolved = parts._replace( + scheme=clean_scheme, + path=clean_path, + # discard the fragment + fragment='', + ).geturl() - return filename + return resolved, rev def debug(self, msg, *args): log.debug(msg, *args)