From 894e21291ec56020d636402667948dc07b38f9c9 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 15 Sep 2020 08:55:44 +0200 Subject: [PATCH] Preprocess dist-info/RECORD file in %install and remove it. According to PEP 627, the RECORD file is optional and doesn't make sense to keep it for system packages. Moreover, its absence should indicate to other tools like pip that they should not touch such packages. Now, we process content of all RECORD files to one pyproject-record (JSON) which is then used in %pyproject_save_files. That way, we can remove the original files in %pyproject_install and keep their content for later. PEP 627: https://www.python.org/dev/peps/pep-0627/#optional-record-file --- macros.pyproject | 14 ++++ pyproject-rpm-macros.spec | 3 + pyproject_preprocess_record.py | 86 +++++++++++++++++++++++ pyproject_save_files.py | 120 +++++++++++---------------------- 4 files changed, 141 insertions(+), 82 deletions(-) create mode 100644 pyproject_preprocess_record.py diff --git a/macros.pyproject b/macros.pyproject index 6ca6a50..f77729d 100644 --- a/macros.pyproject +++ b/macros.pyproject @@ -12,6 +12,7 @@ %pyproject_files %{_builddir}/pyproject-files %pyproject_ghost_distinfo %{_builddir}/pyproject-ghost-distinfo +%pyproject_record %{_builddir}/pyproject-record %pyproject_wheel() %{expand:\\\ export TMPDIR="${PWD}/%{_pyproject_builddir}" @@ -34,12 +35,24 @@ if [ -d %{buildroot}%{python3_sitelib} ]; then for distinfo in %{buildroot}%{python3_sitelib}/*.dist-info; do echo "%ghost ${distinfo#%{buildroot}}" >> %{pyproject_ghost_distinfo} sed -i 's/pip/rpm/' ${distinfo}/INSTALLER + if [ -f ${distinfo}/RECORD ]; then + PYTHONPATH=%{_rpmconfigdir}/redhat \\ + %{__python3} -B %{_rpmconfigdir}/redhat/pyproject_preprocess_record.py \\ + --buildroot %{buildroot} --record ${distinfo}/RECORD --output %{pyproject_record} + rm ${distinfo}/RECORD + fi done fi if [ %{buildroot}%{python3_sitearch} != %{buildroot}%{python3_sitelib} ] && [ -d %{buildroot}%{python3_sitearch} ]; then for distinfo in %{buildroot}%{python3_sitearch}/*.dist-info; do echo "%ghost ${distinfo#%{buildroot}}" >> %{pyproject_ghost_distinfo} sed -i 's/pip/rpm/' ${distinfo}/INSTALLER + if [ -f ${distinfo}/RECORD ]; then + PYTHONPATH=%{_rpmconfigdir}/redhat \\ + %{__python3} -B %{_rpmconfigdir}/redhat/pyproject_preprocess_record.py \\ + --buildroot %{buildroot} --record ${distinfo}/RECORD --output %{pyproject_record} + rm ${distinfo}/RECORD + fi done fi lines=$(wc -l %{pyproject_ghost_distinfo} | cut -f1 -d" ") @@ -61,6 +74,7 @@ fi --sitelib "%{python3_sitelib}" \\ --sitearch "%{python3_sitearch}" \\ --python-version "%{python3_version}" \\ + --pyproject-record "%{pyproject_record}" \\ %{*} } diff --git a/pyproject-rpm-macros.spec b/pyproject-rpm-macros.spec index 77dd6a9..dc1093c 100644 --- a/pyproject-rpm-macros.spec +++ b/pyproject-rpm-macros.spec @@ -15,6 +15,7 @@ Source001: macros.pyproject Source101: pyproject_buildrequires.py Source102: pyproject_save_files.py Source103: pyproject_convert.py +Source104: pyproject_preprocess_record.py # Tests Source201: test_pyproject_buildrequires.py @@ -73,6 +74,7 @@ install -m 644 macros.pyproject %{buildroot}%{_rpmmacrodir}/ install -m 644 pyproject_buildrequires.py %{buildroot}%{_rpmconfigdir}/redhat/ install -m 644 pyproject_convert.py %{buildroot}%{_rpmconfigdir}/redhat/ install -m 644 pyproject_save_files.py %{buildroot}%{_rpmconfigdir}/redhat/ +install -m 644 pyproject_preprocess_record.py %{buildroot}%{_rpmconfigdir}/redhat/ %if %{with tests} %check @@ -86,6 +88,7 @@ export HOSTNAME="rpmbuild" # to speedup tox in network-less mock, see rhbz#1856 %{_rpmconfigdir}/redhat/pyproject_buildrequires.py %{_rpmconfigdir}/redhat/pyproject_convert.py %{_rpmconfigdir}/redhat/pyproject_save_files.py +%{_rpmconfigdir}/redhat/pyproject_preprocess_record.py %doc README.md %license LICENSE diff --git a/pyproject_preprocess_record.py b/pyproject_preprocess_record.py new file mode 100644 index 0000000..10d233d --- /dev/null +++ b/pyproject_preprocess_record.py @@ -0,0 +1,86 @@ +import argparse +import csv +import json +import os +from pathlib import PosixPath + +from pyproject_save_files import BuildrootPath + + +def read_record(record_path): + """ + A generator yielding individual RECORD triplets. + + https://www.python.org/dev/peps/pep-0376/#record + + The triplet is str-path, hash, size -- the last two optional. + We will later care only for the paths anyway. + + Example: + + >>> g = read_record(PosixPath('./test_RECORD')) + >>> next(g) + ['../../../bin/__pycache__/tldr.cpython-....pyc', '', ''] + >>> next(g) + ['../../../bin/tldr', 'sha256=...', '12766'] + >>> next(g) + ['../../../bin/tldr.py', 'sha256=...', '12766'] + """ + with open(record_path, newline="", encoding="utf-8") as f: + yield from csv.reader( + f, delimiter=",", quotechar='"', lineterminator=os.linesep + ) + + +def parse_record(record_path, record_content): + """ + Returns a list with BuildrootPaths parsed from record_content + + params: + record_path: RECORD BuildrootPath + record_content: list of RECORD triplets + first item is a str-path relative to directory where dist-info directory is + (it can also be absolute according to the standard, but not from pip) + + Examples: + + >>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/requests-2.22.0.dist-info/RECORD'), + ... [('requests/sessions.py', 'sha256=xxx', '666'), ...])) + BuildrootPath('/usr/lib/python3.7/site-packages/requests/sessions.py') + + >>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/tldr-0.5.dist-info/RECORD'), + ... [('../../../bin/tldr', 'sha256=yyy', '777'), ...])) + BuildrootPath('/usr/bin/tldr') + """ + sitedir = record_path.parent.parent # trough the dist-info directory + # / with absolute right operand will remove the left operand + # any .. parts are resolved via normpath + return [str((sitedir / row[0]).normpath()) for row in record_content] + + +def save_parsed_record(record_path, parsed_record, output_file): + content = {} + if output_file.is_file(): + content = json.loads(output_file.read_text()) + content[str(record_path)] = parsed_record + output_file.write_text(json.dumps(content)) + + +def main(cli_args): + record_path = BuildrootPath.from_real(cli_args.record, root=cli_args.buildroot) + parsed_record = parse_record(record_path, read_record(cli_args.record)) + save_parsed_record(record_path, parsed_record, cli_args.output) + + +def argparser(): + parser = argparse.ArgumentParser() + r = parser.add_argument_group("required arguments") + r.add_argument("--buildroot", type=PosixPath, required=True) + r.add_argument("--record", type=PosixPath, required=True) + r.add_argument("--output", type=PosixPath, required=True) + return parser + + +if __name__ == "__main__": + cli_args = argparser().parse_args() + main(cli_args) diff --git a/pyproject_save_files.py b/pyproject_save_files.py index 2cc7b98..ad2911c 100644 --- a/pyproject_save_files.py +++ b/pyproject_save_files.py @@ -1,6 +1,6 @@ import argparse -import csv import fnmatch +import json import os from collections import defaultdict @@ -55,79 +55,6 @@ class BuildrootPath(PurePosixPath): return type(self)(os.path.normpath(self)) -def locate_record(root, sitedirs): - """ - Find a RECORD file in the given root. - sitedirs are BuildrootPaths. - Only RECORDs in dist-info dirs inside sitedirs are considered. - There can only be one RECORD file. - - Returns a PosixPath of the RECORD file. - """ - records = [] - for sitedir in sitedirs: - records.extend(sitedir.to_real(root).glob("*.dist-info/RECORD")) - - sitedirs_text = ", ".join(str(p) for p in sitedirs) - if len(records) == 0: - raise FileNotFoundError(f"There is no *.dist-info/RECORD in {sitedirs_text}") - if len(records) > 1: - raise FileExistsError(f"Multiple *.dist-info directories in {sitedirs_text}") - - return records[0] - - -def read_record(record_path): - """ - A generator yielding individual RECORD triplets. - - https://www.python.org/dev/peps/pep-0376/#record - - The triplet is str-path, hash, size -- the last two optional. - We will later care only for the paths anyway. - - Example: - - >>> g = read_record(PosixPath('./test_RECORD')) - >>> next(g) - ['../../../bin/__pycache__/tldr.cpython-....pyc', '', ''] - >>> next(g) - ['../../../bin/tldr', 'sha256=...', '12766'] - >>> next(g) - ['../../../bin/tldr.py', 'sha256=...', '12766'] - """ - with open(record_path, newline="", encoding="utf-8") as f: - yield from csv.reader( - f, delimiter=",", quotechar='"', lineterminator=os.linesep - ) - - -def parse_record(record_path, record_content): - """ - Returns a generator with BuildrootPaths parsed from record_content - - params: - record_path: RECORD BuildrootPath - record_content: list of RECORD triplets - first item is a str-path relative to directory where dist-info directory is - (it can also be absolute according to the standard, but not from pip) - - Examples: - - >>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/requests-2.22.0.dist-info/RECORD'), - ... [('requests/sessions.py', 'sha256=xxx', '666'), ...])) - BuildrootPath('/usr/lib/python3.7/site-packages/requests/sessions.py') - - >>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/tldr-0.5.dist-info/RECORD'), - ... [('../../../bin/tldr', 'sha256=yyy', '777'), ...])) - BuildrootPath('/usr/bin/tldr') - """ - sitedir = record_path.parent.parent # trough the dist-info directory - # / with absolute right operand will remove the left operand - # any .. parts are resolved via normpath - return ((sitedir / row[0]).normpath() for row in record_content) - - def pycached(script, python_version): """ For a script BuildrootPath, return a list with that path and its bytecode glob. @@ -218,6 +145,10 @@ def classify_paths( continue if path.parent == distinfo: + if path.name == "RECORD": + # RECORD files are removed manually in %pyproject_install + # See PEP 627 + continue # TODO is this a license/documentation? paths["metadata"]["files"].append(path) continue @@ -386,7 +317,24 @@ def parse_varargs(varargs): return globs, include_auto -def pyproject_save_files(buildroot, sitelib, sitearch, python_version, varargs): +def load_parsed_record(pyproject_record): + parsed_record = {} + with open(pyproject_record) as pyproject_record_file: + content = json.load(pyproject_record_file) + + if len(content) > 1: + raise FileExistsError("%pyproject install has found more than one *.dist-info/RECORD file. " + "Currently, %pyproject_save_files supports only one wheel → one file list mapping. " + "Feel free to open a bugzilla for pyproject-rpm-macros and describe your usecase.") + + # Redefine strings stored in JSON to BuildRootPaths + for record_path, files in content.items(): + parsed_record[BuildrootPath(record_path)] = [BuildrootPath(f) for f in files] + + return parsed_record + + +def pyproject_save_files(buildroot, sitelib, sitearch, python_version, pyproject_record, varargs): """ Takes arguments from the %{pyproject_save_files} macro @@ -397,14 +345,20 @@ def pyproject_save_files(buildroot, sitelib, sitearch, python_version, varargs): sitedirs = sorted({sitelib, sitearch}) globs, include_auto = parse_varargs(varargs) - record_path_real = locate_record(buildroot, sitedirs) - record_path = BuildrootPath.from_real(record_path_real, root=buildroot) - parsed_record = parse_record(record_path, read_record(record_path_real)) + parsed_records = load_parsed_record(pyproject_record) - paths_dict = classify_paths( - record_path, parsed_record, sitedirs, python_version - ) - return generate_file_list(paths_dict, globs, include_auto) + final_file_list = [] + + for record_path, files in parsed_records.items(): + paths_dict = classify_paths( + record_path, files, sitedirs, python_version + ) + + final_file_list.extend( + generate_file_list(paths_dict, globs, include_auto) + ) + + return final_file_list def main(cli_args): @@ -413,6 +367,7 @@ def main(cli_args): cli_args.sitelib, cli_args.sitearch, cli_args.python_version, + cli_args.pyproject_record, cli_args.varargs, ) @@ -427,6 +382,7 @@ def argparser(): r.add_argument("--sitelib", type=BuildrootPath, required=True) r.add_argument("--sitearch", type=BuildrootPath, required=True) r.add_argument("--python-version", type=str, required=True) + r.add_argument("--pyproject-record", type=PosixPath, required=True) parser.add_argument("varargs", nargs="+") return parser