221 lines
8.0 KiB
Diff
221 lines
8.0 KiB
Diff
From 98a1eff200a60d81f404b2874db24a88ee2a592f Mon Sep 17 00:00:00 2001
|
|
From: Shlomi Fish <shlomif@shlomifish.org>
|
|
Date: Tue, 25 Jun 2019 12:23:39 +0300
|
|
Subject: [PATCH] python 3 compatibility.
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
See https://github.com/rrthomas/recode/issues/15 .
|
|
|
|
Petr Pisar: Ported to 3.7.1 from py3-take2 branch of
|
|
<https://github.com/shlomif/recode>:
|
|
|
|
commit 04aefb26fa080c8e9d6ba7a136a8ae263727fba8
|
|
Author: Shlomi Fish <shlomif@shlomifish.org>
|
|
Date: Tue Jun 25 12:23:39 2019 +0300
|
|
|
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
---
|
|
tables.py | 59 +++++++++++++++++++++++++++++++++++--------------------
|
|
1 file changed, 38 insertions(+), 21 deletions(-)
|
|
|
|
diff --git a/tables.py b/tables.py
|
|
index 2604dfe..51752a6 100755
|
|
--- a/tables.py
|
|
+++ b/tables.py
|
|
@@ -43,6 +43,16 @@ When `-F' and `-n' are used, process Alain's tables.
|
|
|
|
import re, sys
|
|
|
|
+def to_unicode(s):
|
|
+ import six
|
|
+ if isinstance(s, six.text_type):
|
|
+ return s
|
|
+ try:
|
|
+ s = six.text_type(s, 'utf-8')
|
|
+ except UnicodeDecodeError as err:
|
|
+ s = six.text_type(s, 'utf-8', 'ignore')
|
|
+ return s
|
|
+
|
|
# Character constants.
|
|
REPLACEMENT_CHARACTER = 0xFFFD
|
|
NOT_A_CHARACTER = 0xFFFF
|
|
@@ -127,7 +137,7 @@ class Main:
|
|
self.mnemonics = Mnemonics()
|
|
self.mnemonics.digest_mnemonics_ds(input)
|
|
break
|
|
- if input.match('Network Working Group +K\. Simonsen$'):
|
|
+ if input.match('Network Working Group +K\\. Simonsen$'):
|
|
if (self.charnames
|
|
and self.charnames.do_sources
|
|
and not French_option):
|
|
@@ -201,12 +211,15 @@ class Charnames(Options):
|
|
|
|
def digest_french(self, input):
|
|
self.preset_french()
|
|
- fold_table = range(256)
|
|
- for before, after in map(
|
|
- None,
|
|
+ fold_table = list(range(256))
|
|
+ def myord(c):
|
|
+ if isinstance(c, int):
|
|
+ return c
|
|
+ return ord(c)
|
|
+ for before, after in zip(
|
|
u'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÂÇÈÉÊÎÏÑÔÖÛ'.encode('ISO-8859-1'),
|
|
u'abcdefghijklmnopqrstuvwxyzàâçèéêîïñôöû'.encode('ISO-8859-1')):
|
|
- fold_table[ord(before)] = ord(after)
|
|
+ fold_table[myord(before)] = myord(after)
|
|
folding = ''.join(map(chr, fold_table))
|
|
ignorables = (
|
|
u'<commande>'.encode('ISO-8859-1'),
|
|
@@ -314,6 +327,8 @@ class Charnames(Options):
|
|
if len(text) > self.max_length:
|
|
self.max_length = len(text)
|
|
for word in text.split():
|
|
+ word = to_unicode(word)
|
|
+ assert isinstance(word, str)
|
|
self.code_map[word] = self.code_map.get(word, 0) + 1
|
|
|
|
def presort_word(self, word):
|
|
@@ -334,18 +349,18 @@ class Charnames(Options):
|
|
# the second cycling faster from 1 to 255.
|
|
if run.verbose:
|
|
sys.stdout.write(' sorting words...')
|
|
- pairs = map(self.presort_word, self.code_map.keys())
|
|
+ pairs = list(map(self.presort_word, self.code_map.keys()))
|
|
pairs.sort()
|
|
- words = map(lambda pair: pair[1], pairs)
|
|
+ words = list(map(lambda pair: pair[1], pairs))
|
|
pairs = None
|
|
if run.verbose:
|
|
sys.stdout.write(' %d of them\n' % len(words))
|
|
count = len(words)
|
|
- singles = (255 * 255 - count) / 254
|
|
+ singles = (255 * 255 - count) // 254
|
|
# Transmit a few values for further usage by the C code.
|
|
if run.verbose:
|
|
sys.stdout.write(' sorting names...')
|
|
- ucs2_table = self.charname_map.keys()
|
|
+ ucs2_table = list(self.charname_map.keys())
|
|
ucs2_table.sort()
|
|
if run.verbose:
|
|
sys.stdout.write(' %d of them\n' % len(ucs2_table))
|
|
@@ -366,12 +381,14 @@ class Charnames(Options):
|
|
word = words[counter]
|
|
write(' %-28s/* \\%0.3o */\n'
|
|
% ('"%s",' % re.sub('"', r'\"', word), char1))
|
|
+ assert isinstance(word, str)
|
|
self.code_map[words[counter]] = char1
|
|
char1 += 1
|
|
for counter in range(singles, count):
|
|
word = words[counter]
|
|
write(' %-28s/* \\%0.3o\\%0.3o */\n'
|
|
% ('"%s",' % re.sub('"', r'\"', word, 1), char1, char2))
|
|
+ assert isinstance(word, str)
|
|
self.code_map[words[counter]] = 256 * char1 + char2
|
|
if char2 == 255:
|
|
char1 += 1
|
|
@@ -397,7 +414,7 @@ class Charnames(Options):
|
|
if code < 256:
|
|
write('\\%0.3o' % code)
|
|
else:
|
|
- write('\\%0.3o\\%0.3o' % (code / 256, code % 256))
|
|
+ write('\\%0.3o\\%0.3o' % (code // 256, code % 256))
|
|
else:
|
|
sys.stdout.write('??? %s\n' % word)
|
|
write('"},\n')
|
|
@@ -540,7 +557,7 @@ class Mnemonics(Options):
|
|
continue
|
|
if len(line) == 3:
|
|
continue
|
|
- if input.begins(' \.\.\.'):
|
|
+ if input.begins(' \\.\\.\\.'):
|
|
continue
|
|
if line == ' Presentation forms\n':
|
|
continue
|
|
@@ -667,7 +684,7 @@ class Mnemonics(Options):
|
|
'static const struct entry table[TABLE_LENGTH] =\n'
|
|
' {\n')
|
|
count = 0
|
|
- indices = self.mnemonic_map.keys()
|
|
+ indices = list(self.mnemonic_map.keys())
|
|
indices.sort()
|
|
for ucs2 in indices:
|
|
text = self.mnemonic_map[ucs2]
|
|
@@ -681,7 +698,7 @@ class Mnemonics(Options):
|
|
'static const unsigned short inverse[TABLE_LENGTH] =\n'
|
|
' {')
|
|
count = 0
|
|
- keys = inverse_map.keys()
|
|
+ keys = list(inverse_map.keys())
|
|
keys.sort()
|
|
for text in keys:
|
|
if count % 10 == 0:
|
|
@@ -744,7 +761,7 @@ class Strips(Options):
|
|
def digest_rfc1345(self, input):
|
|
self.init_write_data()
|
|
# Informal canonical order of presentation.
|
|
- CHARSET, REM, ALIAS, ESC, BITS, CODE = range(6)
|
|
+ CHARSET, REM, ALIAS, ESC, BITS, CODE = list(range(6))
|
|
charset = None
|
|
skip = False
|
|
while True:
|
|
@@ -956,7 +973,7 @@ class Strips(Options):
|
|
if input.search('\032'):
|
|
# Old MS-DOS C-z !!
|
|
break
|
|
- match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t\#')
|
|
+ match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t#')
|
|
if match:
|
|
self.table[int(match.group(1), 16)] = int(match.group(2), 16)
|
|
else:
|
|
@@ -1125,7 +1142,7 @@ class Strips(Options):
|
|
write = Output('fr-%s' % self.TEXINFO, noheader=True).write
|
|
else:
|
|
write = Output(self.TEXINFO, noheader=True).write
|
|
- charsets = self.remark_map.keys()
|
|
+ charsets = list(self.remark_map.keys())
|
|
charsets.sort()
|
|
for charset in charsets:
|
|
write('\n'
|
|
@@ -1161,12 +1178,12 @@ class Input:
|
|
|
|
def __init__(self, name):
|
|
self.name = name
|
|
- self.input = file(name)
|
|
+ self.input = open(name, "rb")
|
|
self.line_count = 0
|
|
sys.stdout.write("Reading %s\n" % name)
|
|
|
|
def readline(self):
|
|
- self.line = self.input.readline()
|
|
+ self.line = to_unicode(self.input.readline())
|
|
self.line_count += 1
|
|
return self.line
|
|
|
|
@@ -1184,16 +1201,16 @@ class Input:
|
|
return self.line[:len(text)] == text
|
|
|
|
def match(self, pattern):
|
|
- return re.match(pattern, self.line)
|
|
+ return re.match(pattern, to_unicode(self.line))
|
|
|
|
def search(self, pattern):
|
|
- return re.search(pattern, self.line)
|
|
+ return re.search(pattern, to_unicode(self.line))
|
|
|
|
class Output:
|
|
|
|
def __init__(self, name, noheader=False):
|
|
self.name = name
|
|
- self.write = file(name, 'w').write
|
|
+ self.write = open(name, 'w').write
|
|
sys.stdout.write("Writing %s\n" % name)
|
|
if not noheader:
|
|
self.write("""\
|
|
--
|
|
2.21.0
|
|
|