Add some a2x decoding fixes from upstream PR#5

PR: https://github.com/asciidoc/asciidoc-py3/pull/5

Commits:
6469317 Remove unnecessary decode in a2x (Matthew Peveler)
684913e Fix decoding of file that specifies encoding in header tag in a2x (Matthew Peveler)
8369a97 re-add --nonet option (Matthew Peveler)

These changes fix failures in building cgit's documentation (and
hopefully other similar decoding failures).
This commit is contained in:
Todd Zullinger 2018-06-16 00:52:17 -04:00
parent 142d8b499e
commit e014f4f944
2 changed files with 53 additions and 1 deletions

View File

@ -0,0 +1,40 @@
Taken from upstream PR#5 (https://github.com/asciidoc/asciidoc-py3/pull/5)
6469317 Remove unnecessary decode in a2x (Matthew Peveler)
684913e Fix decoding of file that specifies encoding in header tag in a2x (Matthew Peveler)
8369a97 re-add --nonet option (Matthew Peveler)
diff --git c/a2x.py w/a2x.py
index 55eb57e..c015079 100755
--- c/a2x.py
+++ w/a2x.py
@@ -254,15 +254,11 @@ def find_resources(files, tagname, attrname, filter=None):
if OPTIONS.dry_run:
continue
parser = FindResources()
- # HTMLParser has problems with non-ASCII strings.
- # See http://bugs.python.org/issue3932
- contents = read_file(filename)
- mo = re.search(r'\A<\?xml.* encoding="(.*?)"', contents)
- if mo:
- encoding = mo.group(1)
- parser.feed(contents.decode(encoding))
- else:
- parser.feed(contents)
+ with open(filename, 'rb') as open_file:
+ contents = open_file.read()
+ mo = re.search(b'\A<\?xml.* encoding="(.*?)"', contents)
+ contents = contents.decode(mo.group(1).decode('utf-8') if mo else 'utf-8')
+ parser.feed(contents)
parser.close()
result = list(set(result)) # Drop duplicate values.
result.sort()
@@ -337,7 +333,7 @@ def get_source_options(asciidoc_file):
result = []
if os.path.isfile(asciidoc_file):
options = ''
- with open(asciidoc_file) as f:
+ with open(asciidoc_file, encoding='utf-8') as f:
for line in f:
mo = re.search(r'^//\s*a2x:', line)
if mo:

View File

@ -5,7 +5,7 @@
Summary: Text based document generation
Name: asciidoc
Version: 8.6.10
Release: 0.4.20180605git%{shortcommit}%{?dist}
Release: 0.5.20180605git%{shortcommit}%{?dist}
# The python code does not specify a version.
# The javascript example code is GPLv2+.
License: GPL+ and GPLv2+
@ -15,6 +15,14 @@ Source0: https://github.com/%{name}/%{name}-py3/archive/%{commit}/%{name}-py3-%{
Patch1: asciidoc-python3.patch
# https://github.com/asciidoc/asciidoc-py3/pull/5
# https://github.com/asciidoc/asciidoc-py3/issues/16
# Commits:
# 6469317 Remove unnecessary decode in a2x
# 684913e Fix decoding of file that specifies encoding in header tag in a2x
# 8369a97 re-add --nonet option
Patch2: asciidoc-python3-a2x-decode-fix.patch
BuildRequires: python3-devel
BuildRequires: dblatex
BuildRequires: docbook-style-xsl
@ -174,6 +182,10 @@ cd tests
%{_sysconfdir}/asciidoc/filters/music/*.py
%changelog
* Sat Jun 16 2018 Todd Zullinger <tmz@pobox.com> - 8.6.10-0.5.20180605git986f99d
- Add some a2x decoding fixes from upstream PR#5
(https://github.com/asciidoc/asciidoc-py3/pull/5)
* Fri Jun 15 2018 Todd Zullinger <tmz@pobox.com> - 8.6.10-0.4.20180605git986f99d
- Restore BUGS.txt and CHANGELOG.txt doc files