Split requirements.txt parsing to its own module; test & improve it

Related: rhbz#1950291
2021-07-23 08:00:01 +00:00 · 2021-07-23 08:00:01 +00:00 · a3a1caf32a
commit a3a1caf32a
parent f190b5b225
6 changed files with 268 additions and 54 deletions
--- a/pyproject-rpm-macros.spec
+++ b/pyproject-rpm-macros.spec
@ -23,10 +23,12 @@ Source102:      pyproject_save_files.py
 Source103:      pyproject_convert.py
 Source104:      pyproject_preprocess_record.py
 Source105:      pyproject_construct_toxenv.py
 Source106:      pyproject_requirements_txt.py
 # Tests
 Source201:      test_pyproject_buildrequires.py
 Source202:      test_pyproject_save_files.py
 Source203:      test_pyproject_requirements_txt.py
 # Test data
 Source301:      pyproject_buildrequires_testcases.yaml
@ -95,6 +97,7 @@ install -m 644 pyproject_convert.py %{buildroot}%{_rpmconfigdir}/redhat/
 install -m 644 pyproject_save_files.py  %{buildroot}%{_rpmconfigdir}/redhat/
 install -m 644 pyproject_preprocess_record.py %{buildroot}%{_rpmconfigdir}/redhat/
 install -m 644 pyproject_construct_toxenv.py %{buildroot}%{_rpmconfigdir}/redhat/
 install -m 644 pyproject_requirements_txt.py %{buildroot}%{_rpmconfigdir}/redhat/
 %if %{with tests}
 %check
@ -110,6 +113,7 @@ export HOSTNAME="rpmbuild"  # to speedup tox in network-less mock, see rhbz#1856
 %{_rpmconfigdir}/redhat/pyproject_save_files.py
 %{_rpmconfigdir}/redhat/pyproject_preprocess_record.py
 %{_rpmconfigdir}/redhat/pyproject_construct_toxenv.py
 %{_rpmconfigdir}/redhat/pyproject_requirements_txt.py
 %doc README.md
 %license LICENSE
--- a/pyproject_buildrequires.py
+++ b/pyproject_buildrequires.py
@ -11,16 +11,14 @@ import re
 import tempfile
 import email.parser
 import pathlib
-import urllib
+
 from pyproject_requirements_txt import convert_requirements_txt
 # Some valid Python version specifiers are not supported.
 # Allow only the forms we know we can handle.
 VERSION_RE = re.compile(r'[a-zA-Z0-9.-]+(\.\*)?')
 # We treat this as comment in requirements files, as does pip
 COMMENT_RE = re.compile(r'(^|\s+)#.*$')
 class EndPass(Exception):
    """End current pass of generating requirements"""
@ -54,21 +52,16 @@ def hook_call():
        print_err('HOOK STDOUT:', line)
 def pkgname_from_egg_fragment(requirement_str):
    parsed_url = urllib.parse.urlparse(requirement_str)
    parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
    if 'egg' in parsed_fragment:
        return parsed_fragment['egg'][0]
    return None
 def guess_reason_for_invalid_requirement(requirement_str):
    if ':' in requirement_str:
-        return (
+        message = (
            'It might be an URL. '
            '%pyproject_buildrequires cannot handle all URL-based requirements. '
            'Add PackageName@ (see PEP 508) to the URL to at least require any version of PackageName.'
        )
        if '@' in requirement_str:
            message += ' (but note that URLs might not work well with other features)'
        return message
    if '/' in requirement_str:
        return (
            'It might be a local path. '
@ -110,17 +103,13 @@ class Requirements:
                return True
        return False
-    def add(self, requirement_str, *, source=None, allow_egg_pkgname=False):
+    def add(self, requirement_str, *, source=None):
        """Output a Python-style requirement string as RPM dep"""
        print_err(f'Handling {requirement_str} from {source}')
        try:
            requirement = Requirement(requirement_str)
        except InvalidRequirement:
            if allow_egg_pkgname and (egg_name := pkgname_from_egg_fragment(requirement_str)):
                requirement = Requirement(egg_name)
                requirement.url = requirement_str
            else:
            hint = guess_reason_for_invalid_requirement(requirement_str)
            message = f'Requirement {requirement_str!r} from {source} is invalid.'
            if hint:
@ -276,27 +265,6 @@ def generate_run_requirements(backend, requirements):
        requirements.extend(requires, source=f'wheel metadata: {key}')
 def parse_requirements_lines(lines, path=None):
    packages = []
    for line in lines:
        line = COMMENT_RE.sub('', line)
        line = line.strip()
        if line.startswith('-r'):
            recursed_path = line[2:].strip()
            if path:
                recursed_path = path.parent / recursed_path
            with open(recursed_path) as f:
                packages.extend(parse_requirements_lines(f.read().splitlines(), recursed_path))
        elif line.startswith('-'):
            print_err(
                f'WARNING: Skipping dependency line: {line}\n'
                + f'    tox deps options other than -r are not supported (yet).',
            )
        elif line:
            packages.append(line)
    return packages
 def generate_tox_requirements(toxenv, requirements):
    toxenv = ','.join(toxenv)
    requirements.add('tox-current-env >= 0.0.6', source='tox itself')
@ -335,7 +303,7 @@ def generate_tox_requirements(toxenv, requirements):
            r.check_returncode()
        deplines = deps.read().splitlines()
-        packages = parse_requirements_lines(deplines)
+        packages = convert_requirements_txt(deplines)
        requirements.add_extras(*extras.read().splitlines())
        requirements.extend(packages,
                            source=f'tox --print-deps-only: {toxenv}')
@ -372,11 +340,10 @@ def generate_requires(
            raise ValueError('-N option cannot be used in combination with -r, -e, -t, -x options')
        if requirement_files:
            for req_file in requirement_files:
-                lines = req_file.read().splitlines()
+                requirements.extend(
-                packages = parse_requirements_lines(lines, pathlib.Path(req_file.name))
+                    convert_requirements_txt(req_file, pathlib.Path(req_file.name)),
-                requirements.extend(packages,
+                    source=f'requirments file {req_file.name}'
-                                    source=f'requirements file {req_file.name}',
+                )
                                    allow_egg_pkgname=True)
            requirements.check(source='all requirement files')
        if use_build_system:
            backend = get_backend(requirements)
--- a/pyproject_buildrequires_testcases.yaml
+++ b/pyproject_buildrequires_testcases.yaml
@ -603,7 +603,7 @@ With pyproject.toml, requirements file and with -N option:
    python3dist(paramiko)
    python3dist(sqlalchemy)
    python3dist(spam)
-  stderr_contains: "WARNING: Simplifying 'git+https://github.com/monty/spam.git@master#egg=spam' to 'spam'."
+  stderr_contains: "WARNING: Simplifying 'spam@git+https://github.com/monty/spam.git@master#egg=spam' to 'spam'."
  result: 0
 With pyproject.toml, requirements file and without -N option:
@ -662,3 +662,57 @@ Value error if -N and -e arguments are present:
    - py3
  use_build_system: false
  except: ValueError
 Weird and complex requirements file:
  installed:
    setuptools: 50
    wheel: 1
  setup.py: |
    from setuptools import setup
    setup(
        name='test',
        version='0.1',
    )
  requirements.txt: |
    Normal_Req ~= 1.2.0
    good@git+https://github.com/monty/spam.git@master#egg=bad
    git+https://github.com/monty/spam.git@master#egg=ugly
    this-name-is-too-\
    long-for-this-file<\
    =30  # even names and operators can be split
    # this is not a multi-line comment \
    some-dep
    other-dep  # but this *is* a multi-line coment \
    so any garbage can be here
    dep-a # and this comment ends with the blank line below \
    dep-b
    -r requirements2.txt
    ${PACKAGE}${WANTED_VERSION}
  requirements2.txt: |
    dep-from-included-file
  requirement_files:
    - requirements.txt
  environ:
    PACKAGE: package
    WANTED_VERSION: -from-environ >= 1.2.3
  expected: |
    (python3dist(normal-req) >= 1.2 with python3dist(normal-req) < 1.3)
    python3dist(good)
    python3dist(ugly)
    python3dist(this-name-is-too-long-for-this-file) <= 30
    python3dist(some-dep)
    python3dist(other-dep)
    python3dist(dep-a)
    python3dist(dep-b)
    python3dist(dep-from-included-file)
    python3dist(package-from-environ) >= 1.2.3
  stderr_contains:
  - "WARNING: Simplifying 'good@git+https://github.com/monty/spam.git@master#egg=bad' to 'good'."
  # XXX: pyproject_requirements_txt adds a prefix that's not actually in the source;
  # but that's good enough:
  - "WARNING: Simplifying 'ugly@git+https://github.com/monty/spam.git@master#egg=ugly' to 'ugly'."
  result: 0
--- a/pyproject_requirements_txt.py
+++ b/pyproject_requirements_txt.py
@ -0,0 +1,103 @@
 """Best-effort parser for requirements.txt files"""
 import urllib.parse
 from pathlib import Path
 import sys
 import os
 import re
 # `#` starts a comment only at end of line and after whitespace
 COMMENT_RE = re.compile(r'(^|\s+)#.*$')
 # Assume URLs start with a scheme; don't look for "egg=" URLs otherwise
 URL_START_RE = re.compile(r'^[-_+a-zA-Z0-9]+://')
 ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
 PKGNAME_RE = re.compile(r'^[-_a-zA-Z0-9]+')
 # The requirements.txt format evolved rather organically; expect weirdness.
 def convert_requirements_txt(lines, path:Path = None):
    """Convert lines of a requirements file to PEP 440-style requirement strs
    This does NOT handle all of requitrements.txt features (only pip can do
    that), but tries its best.
    The resulting requirements might not actually be valid (either because
    they're wrong in the file, or because we missed a special case).
    path is the path to the requirements.txt file, used for options like `-r`.
    """
    requirements = []
    lines = combine_logical_lines(lines)
    lines = strip_comments(lines)
    lines = expand_env_vars(lines)
    if path:
        filename = path.name
    else:
        filename = '<requirements file>'
    for line in lines:
        if URL_START_RE.match(line):
            # Handle URLs with "egg=..." fragments
            # see https://pip.pypa.io/en/stable/cli/pip_install/#vcs-support
            parsed_url = urllib.parse.urlparse(line)
            parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
            if 'egg' in parsed_fragment:
                # Prepend the package name to the URL.
                match = PKGNAME_RE.match(parsed_fragment['egg'][0])
                if match:
                    pkg_name = match[0]
                    requirements.append(f'{pkg_name}@{line}')
                    continue
            # If that didn't work, pass the line on;
            # the caller will deal with invalid requirements
            requirements.append(line)
        elif line.startswith('-r'):
            recursed_path = line[2:].strip()
            if path:
                recursed_path = path.parent / recursed_path
            recursed_path = Path(recursed_path)
            with recursed_path.open() as f:
                requirements.extend(convert_requirements_txt(f, recursed_path))
        elif line.startswith('-'):
            raise ValueError(f'{filename}: unsupported requirements file option: {line}')
        else:
            requirements.append(line)
    return requirements
 def combine_logical_lines(lines):
    """Combine logical lines together (backslash line-continuation)"""
    pieces = []
    for line in lines:
        line = line.rstrip('\n')
        # Whole-line comments *only* are removed before line-contionuation
        if COMMENT_RE.match(line):
            continue
        if line.endswith('\\'):
            pieces.append(line[:-1])
        else:
            # trailing whitespace is only removed from full logical lines
            pieces.append(line.rstrip())
            yield ''.join(pieces)
            pieces = []
    yield ''.join(pieces)
 def strip_comments(lines):
    for line in lines:
        line, *rest = COMMENT_RE.split(line, maxsplit=1)
        line = line.strip()
        if line:
            yield line
 def expand_env_vars(lines):
    def repl(match):
        value = os.getenv(match['name'])
        if value is None:
            return match['var']
        return value
    for line in lines:
        if match := ENV_VAR_RE.search(line):
            var = match['var']
        yield ENV_VAR_RE.sub(repl, line)
--- a/test_pyproject_buildrequires.py
+++ b/test_pyproject_buildrequires.py
@ -1,6 +1,5 @@
 from pathlib import Path
 import importlib.metadata
 import io
 import pytest
 import yaml
@ -29,6 +28,9 @@ def test_data(case_name, capsys, tmp_path, monkeypatch):
        if filename.endswith(file_types):
            cwd.joinpath(filename).write_text(case[filename])
    for name, value in case.get('environ', {}).items():
        monkeypatch.setenv(name, value)
    def get_installed_version(dist_name):
        try:
            return str(case['installed'][dist_name])
@ -65,8 +67,14 @@ def test_data(case_name, capsys, tmp_path, monkeypatch):
        if 'expected' in case:
            assert out == case['expected']
-        if 'stderr_contains' in case:
+
-            assert case['stderr_contains'] in err
+        # stderr_contains may be a string or list of strings
        stderr_contains = case.get('stderr_contains')
        if stderr_contains is not None:
            if isinstance(stderr_contains, str):
                stderr_contains = [stderr_contains]
            for expected_substring in stderr_contains:
                assert expected_substring in err
    finally:
        for req in requirement_files:
            req.close()
--- a/test_pyproject_requirements_txt.py
+++ b/test_pyproject_requirements_txt.py
@ -0,0 +1,78 @@
 from pathlib import Path
 from textwrap import dedent
 from pyproject_requirements_txt import convert_requirements_txt
 def test_requirements_add_pkgname():
    reqs_txt = dedent(r"""
        good@git+https://github.com/monty/spam.git@master#egg=bad
        git+https://github.com/monty/spam.git@master#egg=ugly
        https://example.com/undead.tar.gz#egg=undead ; python_version > 3.0
    """)
    result = convert_requirements_txt(reqs_txt.splitlines())
    expected = [
        'good@git+https://github.com/monty/spam.git@master#egg=bad',
        'ugly@git+https://github.com/monty/spam.git@master#egg=ugly',
        'undead@https://example.com/undead.tar.gz#egg=undead ; python_version > 3.0',
    ]
    assert result == expected
 def test_requirements_preprocess(monkeypatch):
    reqs_txt = dedent(r"""
        Normal_Req ~= 1.2.0
           whitespace-stripped < 3    <END>
        # indentation is preserved in continuations:
        foo <=\
            30
        bar<=   \
        30
        # names and operators can be split:
        this-was-\
        too-long<\
        =30  
        # this is not a multi-line comment \
        some-dep
             # neither is this \
        other-dep
        another-dep  # but this *is* a multi-line coment \
        so any garbage can be here
        dep-a # and this comment ends with the blank line below \
        dep-b
        ${ENVVAR}
        whitespace-stripped-before-substitution   ${SPACE}
        ${MISSING_ENVVAR}
    """.replace('<END>', ''))
    monkeypatch.setenv('ENVVAR', 'package-from-env')
    monkeypatch.setenv('SPACE', ' ')
    monkeypatch.delenv('MISSING_ENVVAR', raising=False)
    result = convert_requirements_txt(reqs_txt.splitlines())
    expected = [
        'Normal_Req ~= 1.2.0',
        'whitespace-stripped < 3',
        'foo <=    30',
        'bar<=   30',
        'this-was-too-long<=30',
        'some-dep',
        'other-dep',
        'another-dep',
        'dep-a',
        'dep-b',
        'package-from-env',
        'whitespace-stripped-before-substitution    ',
        '${MISSING_ENVVAR}',
    ]
    #result = expected
    assert result == expected
    # This test uses pip internals, so it might break in the future.
    from pip._internal.req.req_file import preprocess
    expected = [line for lineno, line in preprocess(reqs_txt)]
    assert result == expected