asciidoc/asciidoc-python3-a2x-decode...

41 lines
1.6 KiB
Diff

Taken from upstream PR#5 (https://github.com/asciidoc/asciidoc-py3/pull/5)
6469317 Remove unnecessary decode in a2x (Matthew Peveler)
684913e Fix decoding of file that specifies encoding in header tag in a2x (Matthew Peveler)
8369a97 re-add --nonet option (Matthew Peveler)
diff --git c/a2x.py w/a2x.py
index 55eb57e..c015079 100755
--- c/a2x.py
+++ w/a2x.py
@@ -254,15 +254,11 @@ def find_resources(files, tagname, attrname, filter=None):
if OPTIONS.dry_run:
continue
parser = FindResources()
- # HTMLParser has problems with non-ASCII strings.
- # See http://bugs.python.org/issue3932
- contents = read_file(filename)
- mo = re.search(r'\A<\?xml.* encoding="(.*?)"', contents)
- if mo:
- encoding = mo.group(1)
- parser.feed(contents.decode(encoding))
- else:
- parser.feed(contents)
+ with open(filename, 'rb') as open_file:
+ contents = open_file.read()
+ mo = re.search(b'\A<\?xml.* encoding="(.*?)"', contents)
+ contents = contents.decode(mo.group(1).decode('utf-8') if mo else 'utf-8')
+ parser.feed(contents)
parser.close()
result = list(set(result)) # Drop duplicate values.
result.sort()
@@ -337,7 +333,7 @@ def get_source_options(asciidoc_file):
result = []
if os.path.isfile(asciidoc_file):
options = ''
- with open(asciidoc_file) as f:
+ with open(asciidoc_file, encoding='utf-8') as f:
for line in f:
mo = re.search(r'^//\s*a2x:', line)
if mo: