e014f4f944
PR: https://github.com/asciidoc/asciidoc-py3/pull/5 Commits: 6469317 Remove unnecessary decode in a2x (Matthew Peveler) 684913e Fix decoding of file that specifies encoding in header tag in a2x (Matthew Peveler) 8369a97 re-add --nonet option (Matthew Peveler) These changes fix failures in building cgit's documentation (and hopefully other similar decoding failures).
41 lines
1.6 KiB
Diff
41 lines
1.6 KiB
Diff
Taken from upstream PR#5 (https://github.com/asciidoc/asciidoc-py3/pull/5)
|
|
|
|
6469317 Remove unnecessary decode in a2x (Matthew Peveler)
|
|
684913e Fix decoding of file that specifies encoding in header tag in a2x (Matthew Peveler)
|
|
8369a97 re-add --nonet option (Matthew Peveler)
|
|
|
|
diff --git c/a2x.py w/a2x.py
|
|
index 55eb57e..c015079 100755
|
|
--- c/a2x.py
|
|
+++ w/a2x.py
|
|
@@ -254,15 +254,11 @@ def find_resources(files, tagname, attrname, filter=None):
|
|
if OPTIONS.dry_run:
|
|
continue
|
|
parser = FindResources()
|
|
- # HTMLParser has problems with non-ASCII strings.
|
|
- # See http://bugs.python.org/issue3932
|
|
- contents = read_file(filename)
|
|
- mo = re.search(r'\A<\?xml.* encoding="(.*?)"', contents)
|
|
- if mo:
|
|
- encoding = mo.group(1)
|
|
- parser.feed(contents.decode(encoding))
|
|
- else:
|
|
- parser.feed(contents)
|
|
+ with open(filename, 'rb') as open_file:
|
|
+ contents = open_file.read()
|
|
+ mo = re.search(b'\A<\?xml.* encoding="(.*?)"', contents)
|
|
+ contents = contents.decode(mo.group(1).decode('utf-8') if mo else 'utf-8')
|
|
+ parser.feed(contents)
|
|
parser.close()
|
|
result = list(set(result)) # Drop duplicate values.
|
|
result.sort()
|
|
@@ -337,7 +333,7 @@ def get_source_options(asciidoc_file):
|
|
result = []
|
|
if os.path.isfile(asciidoc_file):
|
|
options = ''
|
|
- with open(asciidoc_file) as f:
|
|
+ with open(asciidoc_file, encoding='utf-8') as f:
|
|
for line in f:
|
|
mo = re.search(r'^//\s*a2x:', line)
|
|
if mo:
|