Split requirements.txt parsing to its own module; test & improve it

Related: rhbz#1950291
This commit is contained in:
Petr Viktorin 2021-07-23 08:00:01 +00:00 committed by Miro Hrončok
parent f190b5b225
commit a3a1caf32a
6 changed files with 268 additions and 54 deletions

View File

@ -23,10 +23,12 @@ Source102: pyproject_save_files.py
Source103: pyproject_convert.py
Source104: pyproject_preprocess_record.py
Source105: pyproject_construct_toxenv.py
Source106: pyproject_requirements_txt.py
# Tests
Source201: test_pyproject_buildrequires.py
Source202: test_pyproject_save_files.py
Source203: test_pyproject_requirements_txt.py
# Test data
Source301: pyproject_buildrequires_testcases.yaml
@ -95,6 +97,7 @@ install -m 644 pyproject_convert.py %{buildroot}%{_rpmconfigdir}/redhat/
install -m 644 pyproject_save_files.py %{buildroot}%{_rpmconfigdir}/redhat/
install -m 644 pyproject_preprocess_record.py %{buildroot}%{_rpmconfigdir}/redhat/
install -m 644 pyproject_construct_toxenv.py %{buildroot}%{_rpmconfigdir}/redhat/
install -m 644 pyproject_requirements_txt.py %{buildroot}%{_rpmconfigdir}/redhat/
%if %{with tests}
%check
@ -110,6 +113,7 @@ export HOSTNAME="rpmbuild" # to speedup tox in network-less mock, see rhbz#1856
%{_rpmconfigdir}/redhat/pyproject_save_files.py
%{_rpmconfigdir}/redhat/pyproject_preprocess_record.py
%{_rpmconfigdir}/redhat/pyproject_construct_toxenv.py
%{_rpmconfigdir}/redhat/pyproject_requirements_txt.py
%doc README.md
%license LICENSE

View File

@ -11,16 +11,14 @@ import re
import tempfile
import email.parser
import pathlib
import urllib
from pyproject_requirements_txt import convert_requirements_txt
# Some valid Python version specifiers are not supported.
# Allow only the forms we know we can handle.
VERSION_RE = re.compile(r'[a-zA-Z0-9.-]+(\.\*)?')
# We treat this as comment in requirements files, as does pip
COMMENT_RE = re.compile(r'(^|\s+)#.*$')
class EndPass(Exception):
"""End current pass of generating requirements"""
@ -54,21 +52,16 @@ def hook_call():
print_err('HOOK STDOUT:', line)
def pkgname_from_egg_fragment(requirement_str):
parsed_url = urllib.parse.urlparse(requirement_str)
parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
if 'egg' in parsed_fragment:
return parsed_fragment['egg'][0]
return None
def guess_reason_for_invalid_requirement(requirement_str):
if ':' in requirement_str:
return (
message = (
'It might be an URL. '
'%pyproject_buildrequires cannot handle all URL-based requirements. '
'Add PackageName@ (see PEP 508) to the URL to at least require any version of PackageName.'
)
if '@' in requirement_str:
message += ' (but note that URLs might not work well with other features)'
return message
if '/' in requirement_str:
return (
'It might be a local path. '
@ -110,22 +103,18 @@ class Requirements:
return True
return False
def add(self, requirement_str, *, source=None, allow_egg_pkgname=False):
def add(self, requirement_str, *, source=None):
"""Output a Python-style requirement string as RPM dep"""
print_err(f'Handling {requirement_str} from {source}')
try:
requirement = Requirement(requirement_str)
except InvalidRequirement:
if allow_egg_pkgname and (egg_name := pkgname_from_egg_fragment(requirement_str)):
requirement = Requirement(egg_name)
requirement.url = requirement_str
else:
hint = guess_reason_for_invalid_requirement(requirement_str)
message = f'Requirement {requirement_str!r} from {source} is invalid.'
if hint:
message += f' Hint: {hint}'
raise ValueError(message)
hint = guess_reason_for_invalid_requirement(requirement_str)
message = f'Requirement {requirement_str!r} from {source} is invalid.'
if hint:
message += f' Hint: {hint}'
raise ValueError(message)
if requirement.url:
print_err(
@ -276,27 +265,6 @@ def generate_run_requirements(backend, requirements):
requirements.extend(requires, source=f'wheel metadata: {key}')
def parse_requirements_lines(lines, path=None):
packages = []
for line in lines:
line = COMMENT_RE.sub('', line)
line = line.strip()
if line.startswith('-r'):
recursed_path = line[2:].strip()
if path:
recursed_path = path.parent / recursed_path
with open(recursed_path) as f:
packages.extend(parse_requirements_lines(f.read().splitlines(), recursed_path))
elif line.startswith('-'):
print_err(
f'WARNING: Skipping dependency line: {line}\n'
+ f' tox deps options other than -r are not supported (yet).',
)
elif line:
packages.append(line)
return packages
def generate_tox_requirements(toxenv, requirements):
toxenv = ','.join(toxenv)
requirements.add('tox-current-env >= 0.0.6', source='tox itself')
@ -335,7 +303,7 @@ def generate_tox_requirements(toxenv, requirements):
r.check_returncode()
deplines = deps.read().splitlines()
packages = parse_requirements_lines(deplines)
packages = convert_requirements_txt(deplines)
requirements.add_extras(*extras.read().splitlines())
requirements.extend(packages,
source=f'tox --print-deps-only: {toxenv}')
@ -372,11 +340,10 @@ def generate_requires(
raise ValueError('-N option cannot be used in combination with -r, -e, -t, -x options')
if requirement_files:
for req_file in requirement_files:
lines = req_file.read().splitlines()
packages = parse_requirements_lines(lines, pathlib.Path(req_file.name))
requirements.extend(packages,
source=f'requirements file {req_file.name}',
allow_egg_pkgname=True)
requirements.extend(
convert_requirements_txt(req_file, pathlib.Path(req_file.name)),
source=f'requirments file {req_file.name}'
)
requirements.check(source='all requirement files')
if use_build_system:
backend = get_backend(requirements)

View File

@ -603,7 +603,7 @@ With pyproject.toml, requirements file and with -N option:
python3dist(paramiko)
python3dist(sqlalchemy)
python3dist(spam)
stderr_contains: "WARNING: Simplifying 'git+https://github.com/monty/spam.git@master#egg=spam' to 'spam'."
stderr_contains: "WARNING: Simplifying 'spam@git+https://github.com/monty/spam.git@master#egg=spam' to 'spam'."
result: 0
With pyproject.toml, requirements file and without -N option:
@ -662,3 +662,57 @@ Value error if -N and -e arguments are present:
- py3
use_build_system: false
except: ValueError
Weird and complex requirements file:
installed:
setuptools: 50
wheel: 1
setup.py: |
from setuptools import setup
setup(
name='test',
version='0.1',
)
requirements.txt: |
Normal_Req ~= 1.2.0
good@git+https://github.com/monty/spam.git@master#egg=bad
git+https://github.com/monty/spam.git@master#egg=ugly
this-name-is-too-\
long-for-this-file<\
=30 # even names and operators can be split
# this is not a multi-line comment \
some-dep
other-dep # but this *is* a multi-line coment \
so any garbage can be here
dep-a # and this comment ends with the blank line below \
dep-b
-r requirements2.txt
${PACKAGE}${WANTED_VERSION}
requirements2.txt: |
dep-from-included-file
requirement_files:
- requirements.txt
environ:
PACKAGE: package
WANTED_VERSION: -from-environ >= 1.2.3
expected: |
(python3dist(normal-req) >= 1.2 with python3dist(normal-req) < 1.3)
python3dist(good)
python3dist(ugly)
python3dist(this-name-is-too-long-for-this-file) <= 30
python3dist(some-dep)
python3dist(other-dep)
python3dist(dep-a)
python3dist(dep-b)
python3dist(dep-from-included-file)
python3dist(package-from-environ) >= 1.2.3
stderr_contains:
- "WARNING: Simplifying 'good@git+https://github.com/monty/spam.git@master#egg=bad' to 'good'."
# XXX: pyproject_requirements_txt adds a prefix that's not actually in the source;
# but that's good enough:
- "WARNING: Simplifying 'ugly@git+https://github.com/monty/spam.git@master#egg=ugly' to 'ugly'."
result: 0

View File

@ -0,0 +1,103 @@
"""Best-effort parser for requirements.txt files"""
import urllib.parse
from pathlib import Path
import sys
import os
import re
# `#` starts a comment only at end of line and after whitespace
COMMENT_RE = re.compile(r'(^|\s+)#.*$')
# Assume URLs start with a scheme; don't look for "egg=" URLs otherwise
URL_START_RE = re.compile(r'^[-_+a-zA-Z0-9]+://')
ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
PKGNAME_RE = re.compile(r'^[-_a-zA-Z0-9]+')
# The requirements.txt format evolved rather organically; expect weirdness.
def convert_requirements_txt(lines, path:Path = None):
"""Convert lines of a requirements file to PEP 440-style requirement strs
This does NOT handle all of requitrements.txt features (only pip can do
that), but tries its best.
The resulting requirements might not actually be valid (either because
they're wrong in the file, or because we missed a special case).
path is the path to the requirements.txt file, used for options like `-r`.
"""
requirements = []
lines = combine_logical_lines(lines)
lines = strip_comments(lines)
lines = expand_env_vars(lines)
if path:
filename = path.name
else:
filename = '<requirements file>'
for line in lines:
if URL_START_RE.match(line):
# Handle URLs with "egg=..." fragments
# see https://pip.pypa.io/en/stable/cli/pip_install/#vcs-support
parsed_url = urllib.parse.urlparse(line)
parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
if 'egg' in parsed_fragment:
# Prepend the package name to the URL.
match = PKGNAME_RE.match(parsed_fragment['egg'][0])
if match:
pkg_name = match[0]
requirements.append(f'{pkg_name}@{line}')
continue
# If that didn't work, pass the line on;
# the caller will deal with invalid requirements
requirements.append(line)
elif line.startswith('-r'):
recursed_path = line[2:].strip()
if path:
recursed_path = path.parent / recursed_path
recursed_path = Path(recursed_path)
with recursed_path.open() as f:
requirements.extend(convert_requirements_txt(f, recursed_path))
elif line.startswith('-'):
raise ValueError(f'{filename}: unsupported requirements file option: {line}')
else:
requirements.append(line)
return requirements
def combine_logical_lines(lines):
"""Combine logical lines together (backslash line-continuation)"""
pieces = []
for line in lines:
line = line.rstrip('\n')
# Whole-line comments *only* are removed before line-contionuation
if COMMENT_RE.match(line):
continue
if line.endswith('\\'):
pieces.append(line[:-1])
else:
# trailing whitespace is only removed from full logical lines
pieces.append(line.rstrip())
yield ''.join(pieces)
pieces = []
yield ''.join(pieces)
def strip_comments(lines):
for line in lines:
line, *rest = COMMENT_RE.split(line, maxsplit=1)
line = line.strip()
if line:
yield line
def expand_env_vars(lines):
def repl(match):
value = os.getenv(match['name'])
if value is None:
return match['var']
return value
for line in lines:
if match := ENV_VAR_RE.search(line):
var = match['var']
yield ENV_VAR_RE.sub(repl, line)

View File

@ -1,6 +1,5 @@
from pathlib import Path
import importlib.metadata
import io
import pytest
import yaml
@ -29,6 +28,9 @@ def test_data(case_name, capsys, tmp_path, monkeypatch):
if filename.endswith(file_types):
cwd.joinpath(filename).write_text(case[filename])
for name, value in case.get('environ', {}).items():
monkeypatch.setenv(name, value)
def get_installed_version(dist_name):
try:
return str(case['installed'][dist_name])
@ -65,8 +67,14 @@ def test_data(case_name, capsys, tmp_path, monkeypatch):
if 'expected' in case:
assert out == case['expected']
if 'stderr_contains' in case:
assert case['stderr_contains'] in err
# stderr_contains may be a string or list of strings
stderr_contains = case.get('stderr_contains')
if stderr_contains is not None:
if isinstance(stderr_contains, str):
stderr_contains = [stderr_contains]
for expected_substring in stderr_contains:
assert expected_substring in err
finally:
for req in requirement_files:
req.close()

View File

@ -0,0 +1,78 @@
from pathlib import Path
from textwrap import dedent
from pyproject_requirements_txt import convert_requirements_txt
def test_requirements_add_pkgname():
reqs_txt = dedent(r"""
good@git+https://github.com/monty/spam.git@master#egg=bad
git+https://github.com/monty/spam.git@master#egg=ugly
https://example.com/undead.tar.gz#egg=undead ; python_version > 3.0
""")
result = convert_requirements_txt(reqs_txt.splitlines())
expected = [
'good@git+https://github.com/monty/spam.git@master#egg=bad',
'ugly@git+https://github.com/monty/spam.git@master#egg=ugly',
'undead@https://example.com/undead.tar.gz#egg=undead ; python_version > 3.0',
]
assert result == expected
def test_requirements_preprocess(monkeypatch):
reqs_txt = dedent(r"""
Normal_Req ~= 1.2.0
whitespace-stripped < 3 <END>
# indentation is preserved in continuations:
foo <=\
30
bar<= \
30
# names and operators can be split:
this-was-\
too-long<\
=30
# this is not a multi-line comment \
some-dep
# neither is this \
other-dep
another-dep # but this *is* a multi-line coment \
so any garbage can be here
dep-a # and this comment ends with the blank line below \
dep-b
${ENVVAR}
whitespace-stripped-before-substitution ${SPACE}
${MISSING_ENVVAR}
""".replace('<END>', ''))
monkeypatch.setenv('ENVVAR', 'package-from-env')
monkeypatch.setenv('SPACE', ' ')
monkeypatch.delenv('MISSING_ENVVAR', raising=False)
result = convert_requirements_txt(reqs_txt.splitlines())
expected = [
'Normal_Req ~= 1.2.0',
'whitespace-stripped < 3',
'foo <= 30',
'bar<= 30',
'this-was-too-long<=30',
'some-dep',
'other-dep',
'another-dep',
'dep-a',
'dep-b',
'package-from-env',
'whitespace-stripped-before-substitution ',
'${MISSING_ENVVAR}',
]
#result = expected
assert result == expected
# This test uses pip internals, so it might break in the future.
from pip._internal.req.req_file import preprocess
expected = [line for lineno, line in preprocess(reqs_txt)]
assert result == expected