2021-07-22 10:01:48 +00:00
|
|
|
"""Best-effort parser for requirements.txt files"""
|
|
|
|
|
|
|
|
import urllib.parse
|
|
|
|
from pathlib import Path
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
|
|
|
|
# `#` starts a comment only at end of line and after whitespace
|
|
|
|
COMMENT_RE = re.compile(r'(^|\s+)#.*$')
|
|
|
|
|
|
|
|
# Assume URLs start with a scheme; don't look for "egg=" URLs otherwise
|
|
|
|
URL_START_RE = re.compile(r'^[-_+a-zA-Z0-9]+://')
|
|
|
|
|
|
|
|
ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
|
|
|
|
PKGNAME_RE = re.compile(r'^[-_a-zA-Z0-9]+')
|
|
|
|
|
|
|
|
# The requirements.txt format evolved rather organically; expect weirdness.
|
|
|
|
|
|
|
|
def convert_requirements_txt(lines, path:Path = None):
|
|
|
|
"""Convert lines of a requirements file to PEP 440-style requirement strs
|
|
|
|
|
2021-07-25 21:35:52 +00:00
|
|
|
This does NOT handle all of requirements.txt features (only pip can do
|
2021-07-22 10:01:48 +00:00
|
|
|
that), but tries its best.
|
|
|
|
|
|
|
|
The resulting requirements might not actually be valid (either because
|
|
|
|
they're wrong in the file, or because we missed a special case).
|
|
|
|
|
|
|
|
path is the path to the requirements.txt file, used for options like `-r`.
|
|
|
|
"""
|
|
|
|
requirements = []
|
|
|
|
lines = combine_logical_lines(lines)
|
|
|
|
lines = strip_comments(lines)
|
|
|
|
lines = expand_env_vars(lines)
|
|
|
|
if path:
|
|
|
|
filename = path.name
|
|
|
|
else:
|
|
|
|
filename = '<requirements file>'
|
|
|
|
for line in lines:
|
|
|
|
if URL_START_RE.match(line):
|
|
|
|
# Handle URLs with "egg=..." fragments
|
|
|
|
# see https://pip.pypa.io/en/stable/cli/pip_install/#vcs-support
|
|
|
|
parsed_url = urllib.parse.urlparse(line)
|
|
|
|
parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
|
|
|
|
if 'egg' in parsed_fragment:
|
|
|
|
# Prepend the package name to the URL.
|
|
|
|
match = PKGNAME_RE.match(parsed_fragment['egg'][0])
|
|
|
|
if match:
|
|
|
|
pkg_name = match[0]
|
|
|
|
requirements.append(f'{pkg_name}@{line}')
|
|
|
|
continue
|
|
|
|
# If that didn't work, pass the line on;
|
|
|
|
# the caller will deal with invalid requirements
|
|
|
|
requirements.append(line)
|
|
|
|
elif line.startswith('-r'):
|
|
|
|
recursed_path = line[2:].strip()
|
|
|
|
if path:
|
|
|
|
recursed_path = path.parent / recursed_path
|
|
|
|
recursed_path = Path(recursed_path)
|
|
|
|
with recursed_path.open() as f:
|
|
|
|
requirements.extend(convert_requirements_txt(f, recursed_path))
|
|
|
|
elif line.startswith('-'):
|
|
|
|
raise ValueError(f'{filename}: unsupported requirements file option: {line}')
|
|
|
|
else:
|
|
|
|
requirements.append(line)
|
|
|
|
return requirements
|
|
|
|
|
|
|
|
def combine_logical_lines(lines):
|
|
|
|
"""Combine logical lines together (backslash line-continuation)"""
|
|
|
|
pieces = []
|
|
|
|
for line in lines:
|
|
|
|
line = line.rstrip('\n')
|
|
|
|
# Whole-line comments *only* are removed before line-contionuation
|
|
|
|
if COMMENT_RE.match(line):
|
|
|
|
continue
|
|
|
|
if line.endswith('\\'):
|
|
|
|
pieces.append(line[:-1])
|
|
|
|
else:
|
|
|
|
# trailing whitespace is only removed from full logical lines
|
|
|
|
pieces.append(line.rstrip())
|
|
|
|
yield ''.join(pieces)
|
|
|
|
pieces = []
|
|
|
|
yield ''.join(pieces)
|
|
|
|
|
|
|
|
|
|
|
|
def strip_comments(lines):
|
|
|
|
for line in lines:
|
|
|
|
line, *rest = COMMENT_RE.split(line, maxsplit=1)
|
|
|
|
line = line.strip()
|
|
|
|
if line:
|
|
|
|
yield line
|
|
|
|
|
|
|
|
|
|
|
|
def expand_env_vars(lines):
|
|
|
|
def repl(match):
|
|
|
|
value = os.getenv(match['name'])
|
|
|
|
if value is None:
|
|
|
|
return match['var']
|
|
|
|
return value
|
|
|
|
for line in lines:
|
|
|
|
if match := ENV_VAR_RE.search(line):
|
|
|
|
var = match['var']
|
|
|
|
yield ENV_VAR_RE.sub(repl, line)
|