recode/recode-3.7.1-python-3-compatibility.patch

221 lines
8.0 KiB
Diff
Raw Normal View History

2019-08-16 10:08:47 +00:00
From 98a1eff200a60d81f404b2874db24a88ee2a592f Mon Sep 17 00:00:00 2001
From: Shlomi Fish <shlomif@shlomifish.org>
Date: Tue, 25 Jun 2019 12:23:39 +0300
Subject: [PATCH] python 3 compatibility.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
See https://github.com/rrthomas/recode/issues/15 .
Petr Pisar: Ported to 3.7.1 from py3-take2 branch of
<https://github.com/shlomif/recode>:
commit 04aefb26fa080c8e9d6ba7a136a8ae263727fba8
Author: Shlomi Fish <shlomif@shlomifish.org>
Date: Tue Jun 25 12:23:39 2019 +0300
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
tables.py | 59 +++++++++++++++++++++++++++++++++++--------------------
1 file changed, 38 insertions(+), 21 deletions(-)
diff --git a/tables.py b/tables.py
index 2604dfe..51752a6 100755
--- a/tables.py
+++ b/tables.py
@@ -43,6 +43,16 @@ When `-F' and `-n' are used, process Alain's tables.
import re, sys
+def to_unicode(s):
+ import six
+ if isinstance(s, six.text_type):
+ return s
+ try:
+ s = six.text_type(s, 'utf-8')
+ except UnicodeDecodeError as err:
+ s = six.text_type(s, 'utf-8', 'ignore')
+ return s
+
# Character constants.
REPLACEMENT_CHARACTER = 0xFFFD
NOT_A_CHARACTER = 0xFFFF
@@ -127,7 +137,7 @@ class Main:
self.mnemonics = Mnemonics()
self.mnemonics.digest_mnemonics_ds(input)
break
- if input.match('Network Working Group +K\. Simonsen$'):
+ if input.match('Network Working Group +K\\. Simonsen$'):
if (self.charnames
and self.charnames.do_sources
and not French_option):
@@ -201,12 +211,15 @@ class Charnames(Options):
def digest_french(self, input):
self.preset_french()
- fold_table = range(256)
- for before, after in map(
- None,
+ fold_table = list(range(256))
+ def myord(c):
+ if isinstance(c, int):
+ return c
+ return ord(c)
+ for before, after in zip(
u'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÂÇÈÉÊÎÏÑÔÖÛ'.encode('ISO-8859-1'),
u'abcdefghijklmnopqrstuvwxyzàâçèéêîïñôöû'.encode('ISO-8859-1')):
- fold_table[ord(before)] = ord(after)
+ fold_table[myord(before)] = myord(after)
folding = ''.join(map(chr, fold_table))
ignorables = (
u'<commande>'.encode('ISO-8859-1'),
@@ -314,6 +327,8 @@ class Charnames(Options):
if len(text) > self.max_length:
self.max_length = len(text)
for word in text.split():
+ word = to_unicode(word)
+ assert isinstance(word, str)
self.code_map[word] = self.code_map.get(word, 0) + 1
def presort_word(self, word):
@@ -334,18 +349,18 @@ class Charnames(Options):
# the second cycling faster from 1 to 255.
if run.verbose:
sys.stdout.write(' sorting words...')
- pairs = map(self.presort_word, self.code_map.keys())
+ pairs = list(map(self.presort_word, self.code_map.keys()))
pairs.sort()
- words = map(lambda pair: pair[1], pairs)
+ words = list(map(lambda pair: pair[1], pairs))
pairs = None
if run.verbose:
sys.stdout.write(' %d of them\n' % len(words))
count = len(words)
- singles = (255 * 255 - count) / 254
+ singles = (255 * 255 - count) // 254
# Transmit a few values for further usage by the C code.
if run.verbose:
sys.stdout.write(' sorting names...')
- ucs2_table = self.charname_map.keys()
+ ucs2_table = list(self.charname_map.keys())
ucs2_table.sort()
if run.verbose:
sys.stdout.write(' %d of them\n' % len(ucs2_table))
@@ -366,12 +381,14 @@ class Charnames(Options):
word = words[counter]
write(' %-28s/* \\%0.3o */\n'
% ('"%s",' % re.sub('"', r'\"', word), char1))
+ assert isinstance(word, str)
self.code_map[words[counter]] = char1
char1 += 1
for counter in range(singles, count):
word = words[counter]
write(' %-28s/* \\%0.3o\\%0.3o */\n'
% ('"%s",' % re.sub('"', r'\"', word, 1), char1, char2))
+ assert isinstance(word, str)
self.code_map[words[counter]] = 256 * char1 + char2
if char2 == 255:
char1 += 1
@@ -397,7 +414,7 @@ class Charnames(Options):
if code < 256:
write('\\%0.3o' % code)
else:
- write('\\%0.3o\\%0.3o' % (code / 256, code % 256))
+ write('\\%0.3o\\%0.3o' % (code // 256, code % 256))
else:
sys.stdout.write('??? %s\n' % word)
write('"},\n')
@@ -540,7 +557,7 @@ class Mnemonics(Options):
continue
if len(line) == 3:
continue
- if input.begins(' \.\.\.'):
+ if input.begins(' \\.\\.\\.'):
continue
if line == ' Presentation forms\n':
continue
@@ -667,7 +684,7 @@ class Mnemonics(Options):
'static const struct entry table[TABLE_LENGTH] =\n'
' {\n')
count = 0
- indices = self.mnemonic_map.keys()
+ indices = list(self.mnemonic_map.keys())
indices.sort()
for ucs2 in indices:
text = self.mnemonic_map[ucs2]
@@ -681,7 +698,7 @@ class Mnemonics(Options):
'static const unsigned short inverse[TABLE_LENGTH] =\n'
' {')
count = 0
- keys = inverse_map.keys()
+ keys = list(inverse_map.keys())
keys.sort()
for text in keys:
if count % 10 == 0:
@@ -744,7 +761,7 @@ class Strips(Options):
def digest_rfc1345(self, input):
self.init_write_data()
# Informal canonical order of presentation.
- CHARSET, REM, ALIAS, ESC, BITS, CODE = range(6)
+ CHARSET, REM, ALIAS, ESC, BITS, CODE = list(range(6))
charset = None
skip = False
while True:
@@ -956,7 +973,7 @@ class Strips(Options):
if input.search('\032'):
# Old MS-DOS C-z !!
break
- match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t\#')
+ match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t#')
if match:
self.table[int(match.group(1), 16)] = int(match.group(2), 16)
else:
@@ -1125,7 +1142,7 @@ class Strips(Options):
write = Output('fr-%s' % self.TEXINFO, noheader=True).write
else:
write = Output(self.TEXINFO, noheader=True).write
- charsets = self.remark_map.keys()
+ charsets = list(self.remark_map.keys())
charsets.sort()
for charset in charsets:
write('\n'
@@ -1161,12 +1178,12 @@ class Input:
def __init__(self, name):
self.name = name
- self.input = file(name)
+ self.input = open(name, "rb")
self.line_count = 0
sys.stdout.write("Reading %s\n" % name)
def readline(self):
- self.line = self.input.readline()
+ self.line = to_unicode(self.input.readline())
self.line_count += 1
return self.line
@@ -1184,16 +1201,16 @@ class Input:
return self.line[:len(text)] == text
def match(self, pattern):
- return re.match(pattern, self.line)
+ return re.match(pattern, to_unicode(self.line))
def search(self, pattern):
- return re.search(pattern, self.line)
+ return re.search(pattern, to_unicode(self.line))
class Output:
def __init__(self, name, noheader=False):
self.name = name
- self.write = file(name, 'w').write
+ self.write = open(name, 'w').write
sys.stdout.write("Writing %s\n" % name)
if not noheader:
self.write("""\
--
2.21.0