Preprocess dist-info/RECORD file in %install and remove it.

According to PEP 627, the RECORD file is optional and
doesn't make sense to keep it for system packages. Moreover,
its absence should indicate to other tools like pip that
they should not touch such packages.

Now, we process content of all RECORD files to one
pyproject-record (JSON) which is then used in
%pyproject_save_files. That way, we can remove the original
files in %pyproject_install and keep their content for
later.

PEP 627: https://www.python.org/dev/peps/pep-0627/#optional-record-file
This commit is contained in:
Lumir Balhar 2020-09-15 08:55:44 +02:00 committed by churchyard
parent b1f03d7f51
commit 894e21291e
4 changed files with 141 additions and 82 deletions

View File

@ -12,6 +12,7 @@
%pyproject_files %{_builddir}/pyproject-files
%pyproject_ghost_distinfo %{_builddir}/pyproject-ghost-distinfo
%pyproject_record %{_builddir}/pyproject-record
%pyproject_wheel() %{expand:\\\
export TMPDIR="${PWD}/%{_pyproject_builddir}"
@ -34,12 +35,24 @@ if [ -d %{buildroot}%{python3_sitelib} ]; then
for distinfo in %{buildroot}%{python3_sitelib}/*.dist-info; do
echo "%ghost ${distinfo#%{buildroot}}" >> %{pyproject_ghost_distinfo}
sed -i 's/pip/rpm/' ${distinfo}/INSTALLER
if [ -f ${distinfo}/RECORD ]; then
PYTHONPATH=%{_rpmconfigdir}/redhat \\
%{__python3} -B %{_rpmconfigdir}/redhat/pyproject_preprocess_record.py \\
--buildroot %{buildroot} --record ${distinfo}/RECORD --output %{pyproject_record}
rm ${distinfo}/RECORD
fi
done
fi
if [ %{buildroot}%{python3_sitearch} != %{buildroot}%{python3_sitelib} ] && [ -d %{buildroot}%{python3_sitearch} ]; then
for distinfo in %{buildroot}%{python3_sitearch}/*.dist-info; do
echo "%ghost ${distinfo#%{buildroot}}" >> %{pyproject_ghost_distinfo}
sed -i 's/pip/rpm/' ${distinfo}/INSTALLER
if [ -f ${distinfo}/RECORD ]; then
PYTHONPATH=%{_rpmconfigdir}/redhat \\
%{__python3} -B %{_rpmconfigdir}/redhat/pyproject_preprocess_record.py \\
--buildroot %{buildroot} --record ${distinfo}/RECORD --output %{pyproject_record}
rm ${distinfo}/RECORD
fi
done
fi
lines=$(wc -l %{pyproject_ghost_distinfo} | cut -f1 -d" ")
@ -61,6 +74,7 @@ fi
--sitelib "%{python3_sitelib}" \\
--sitearch "%{python3_sitearch}" \\
--python-version "%{python3_version}" \\
--pyproject-record "%{pyproject_record}" \\
%{*}
}

View File

@ -15,6 +15,7 @@ Source001: macros.pyproject
Source101: pyproject_buildrequires.py
Source102: pyproject_save_files.py
Source103: pyproject_convert.py
Source104: pyproject_preprocess_record.py
# Tests
Source201: test_pyproject_buildrequires.py
@ -73,6 +74,7 @@ install -m 644 macros.pyproject %{buildroot}%{_rpmmacrodir}/
install -m 644 pyproject_buildrequires.py %{buildroot}%{_rpmconfigdir}/redhat/
install -m 644 pyproject_convert.py %{buildroot}%{_rpmconfigdir}/redhat/
install -m 644 pyproject_save_files.py %{buildroot}%{_rpmconfigdir}/redhat/
install -m 644 pyproject_preprocess_record.py %{buildroot}%{_rpmconfigdir}/redhat/
%if %{with tests}
%check
@ -86,6 +88,7 @@ export HOSTNAME="rpmbuild" # to speedup tox in network-less mock, see rhbz#1856
%{_rpmconfigdir}/redhat/pyproject_buildrequires.py
%{_rpmconfigdir}/redhat/pyproject_convert.py
%{_rpmconfigdir}/redhat/pyproject_save_files.py
%{_rpmconfigdir}/redhat/pyproject_preprocess_record.py
%doc README.md
%license LICENSE

View File

@ -0,0 +1,86 @@
import argparse
import csv
import json
import os
from pathlib import PosixPath
from pyproject_save_files import BuildrootPath
def read_record(record_path):
"""
A generator yielding individual RECORD triplets.
https://www.python.org/dev/peps/pep-0376/#record
The triplet is str-path, hash, size -- the last two optional.
We will later care only for the paths anyway.
Example:
>>> g = read_record(PosixPath('./test_RECORD'))
>>> next(g)
['../../../bin/__pycache__/tldr.cpython-....pyc', '', '']
>>> next(g)
['../../../bin/tldr', 'sha256=...', '12766']
>>> next(g)
['../../../bin/tldr.py', 'sha256=...', '12766']
"""
with open(record_path, newline="", encoding="utf-8") as f:
yield from csv.reader(
f, delimiter=",", quotechar='"', lineterminator=os.linesep
)
def parse_record(record_path, record_content):
"""
Returns a list with BuildrootPaths parsed from record_content
params:
record_path: RECORD BuildrootPath
record_content: list of RECORD triplets
first item is a str-path relative to directory where dist-info directory is
(it can also be absolute according to the standard, but not from pip)
Examples:
>>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/requests-2.22.0.dist-info/RECORD'),
... [('requests/sessions.py', 'sha256=xxx', '666'), ...]))
BuildrootPath('/usr/lib/python3.7/site-packages/requests/sessions.py')
>>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/tldr-0.5.dist-info/RECORD'),
... [('../../../bin/tldr', 'sha256=yyy', '777'), ...]))
BuildrootPath('/usr/bin/tldr')
"""
sitedir = record_path.parent.parent # trough the dist-info directory
# / with absolute right operand will remove the left operand
# any .. parts are resolved via normpath
return [str((sitedir / row[0]).normpath()) for row in record_content]
def save_parsed_record(record_path, parsed_record, output_file):
content = {}
if output_file.is_file():
content = json.loads(output_file.read_text())
content[str(record_path)] = parsed_record
output_file.write_text(json.dumps(content))
def main(cli_args):
record_path = BuildrootPath.from_real(cli_args.record, root=cli_args.buildroot)
parsed_record = parse_record(record_path, read_record(cli_args.record))
save_parsed_record(record_path, parsed_record, cli_args.output)
def argparser():
parser = argparse.ArgumentParser()
r = parser.add_argument_group("required arguments")
r.add_argument("--buildroot", type=PosixPath, required=True)
r.add_argument("--record", type=PosixPath, required=True)
r.add_argument("--output", type=PosixPath, required=True)
return parser
if __name__ == "__main__":
cli_args = argparser().parse_args()
main(cli_args)

View File

@ -1,6 +1,6 @@
import argparse
import csv
import fnmatch
import json
import os
from collections import defaultdict
@ -55,79 +55,6 @@ class BuildrootPath(PurePosixPath):
return type(self)(os.path.normpath(self))
def locate_record(root, sitedirs):
"""
Find a RECORD file in the given root.
sitedirs are BuildrootPaths.
Only RECORDs in dist-info dirs inside sitedirs are considered.
There can only be one RECORD file.
Returns a PosixPath of the RECORD file.
"""
records = []
for sitedir in sitedirs:
records.extend(sitedir.to_real(root).glob("*.dist-info/RECORD"))
sitedirs_text = ", ".join(str(p) for p in sitedirs)
if len(records) == 0:
raise FileNotFoundError(f"There is no *.dist-info/RECORD in {sitedirs_text}")
if len(records) > 1:
raise FileExistsError(f"Multiple *.dist-info directories in {sitedirs_text}")
return records[0]
def read_record(record_path):
"""
A generator yielding individual RECORD triplets.
https://www.python.org/dev/peps/pep-0376/#record
The triplet is str-path, hash, size -- the last two optional.
We will later care only for the paths anyway.
Example:
>>> g = read_record(PosixPath('./test_RECORD'))
>>> next(g)
['../../../bin/__pycache__/tldr.cpython-....pyc', '', '']
>>> next(g)
['../../../bin/tldr', 'sha256=...', '12766']
>>> next(g)
['../../../bin/tldr.py', 'sha256=...', '12766']
"""
with open(record_path, newline="", encoding="utf-8") as f:
yield from csv.reader(
f, delimiter=",", quotechar='"', lineterminator=os.linesep
)
def parse_record(record_path, record_content):
"""
Returns a generator with BuildrootPaths parsed from record_content
params:
record_path: RECORD BuildrootPath
record_content: list of RECORD triplets
first item is a str-path relative to directory where dist-info directory is
(it can also be absolute according to the standard, but not from pip)
Examples:
>>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/requests-2.22.0.dist-info/RECORD'),
... [('requests/sessions.py', 'sha256=xxx', '666'), ...]))
BuildrootPath('/usr/lib/python3.7/site-packages/requests/sessions.py')
>>> next(parse_record(BuildrootPath('/usr/lib/python3.7/site-packages/tldr-0.5.dist-info/RECORD'),
... [('../../../bin/tldr', 'sha256=yyy', '777'), ...]))
BuildrootPath('/usr/bin/tldr')
"""
sitedir = record_path.parent.parent # trough the dist-info directory
# / with absolute right operand will remove the left operand
# any .. parts are resolved via normpath
return ((sitedir / row[0]).normpath() for row in record_content)
def pycached(script, python_version):
"""
For a script BuildrootPath, return a list with that path and its bytecode glob.
@ -218,6 +145,10 @@ def classify_paths(
continue
if path.parent == distinfo:
if path.name == "RECORD":
# RECORD files are removed manually in %pyproject_install
# See PEP 627
continue
# TODO is this a license/documentation?
paths["metadata"]["files"].append(path)
continue
@ -386,7 +317,24 @@ def parse_varargs(varargs):
return globs, include_auto
def pyproject_save_files(buildroot, sitelib, sitearch, python_version, varargs):
def load_parsed_record(pyproject_record):
parsed_record = {}
with open(pyproject_record) as pyproject_record_file:
content = json.load(pyproject_record_file)
if len(content) > 1:
raise FileExistsError("%pyproject install has found more than one *.dist-info/RECORD file. "
"Currently, %pyproject_save_files supports only one wheel → one file list mapping. "
"Feel free to open a bugzilla for pyproject-rpm-macros and describe your usecase.")
# Redefine strings stored in JSON to BuildRootPaths
for record_path, files in content.items():
parsed_record[BuildrootPath(record_path)] = [BuildrootPath(f) for f in files]
return parsed_record
def pyproject_save_files(buildroot, sitelib, sitearch, python_version, pyproject_record, varargs):
"""
Takes arguments from the %{pyproject_save_files} macro
@ -397,14 +345,20 @@ def pyproject_save_files(buildroot, sitelib, sitearch, python_version, varargs):
sitedirs = sorted({sitelib, sitearch})
globs, include_auto = parse_varargs(varargs)
record_path_real = locate_record(buildroot, sitedirs)
record_path = BuildrootPath.from_real(record_path_real, root=buildroot)
parsed_record = parse_record(record_path, read_record(record_path_real))
parsed_records = load_parsed_record(pyproject_record)
paths_dict = classify_paths(
record_path, parsed_record, sitedirs, python_version
)
return generate_file_list(paths_dict, globs, include_auto)
final_file_list = []
for record_path, files in parsed_records.items():
paths_dict = classify_paths(
record_path, files, sitedirs, python_version
)
final_file_list.extend(
generate_file_list(paths_dict, globs, include_auto)
)
return final_file_list
def main(cli_args):
@ -413,6 +367,7 @@ def main(cli_args):
cli_args.sitelib,
cli_args.sitearch,
cli_args.python_version,
cli_args.pyproject_record,
cli_args.varargs,
)
@ -427,6 +382,7 @@ def argparser():
r.add_argument("--sitelib", type=BuildrootPath, required=True)
r.add_argument("--sitearch", type=BuildrootPath, required=True)
r.add_argument("--python-version", type=str, required=True)
r.add_argument("--pyproject-record", type=PosixPath, required=True)
parser.add_argument("varargs", nargs="+")
return parser