recode/recode-3.7.1-python-3-compatibility.patch

From 98a1eff200a60d81f404b2874db24a88ee2a592f Mon Sep 17 00:00:00 2001
From: Shlomi Fish <shlomif@shlomifish.org>
Date: Tue, 25 Jun 2019 12:23:39 +0300
Subject: [PATCH] python 3 compatibility.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/rrthomas/recode/issues/15 .

Petr Pisar: Ported to 3.7.1 from py3-take2 branch of
<https://github.com/shlomif/recode>:

    commit 04aefb26fa080c8e9d6ba7a136a8ae263727fba8
    Author: Shlomi Fish <shlomif@shlomifish.org>
    Date:   Tue Jun 25 12:23:39 2019 +0300

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 tables.py | 59 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/tables.py b/tables.py
index 2604dfe..51752a6 100755
--- a/tables.py
+++ b/tables.py
@@ -43,6 +43,16 @@ When `-F' and `-n' are used, process Alain's tables.
 
 import re, sys
 
+def to_unicode(s):
+    import six
+    if isinstance(s, six.text_type):
+        return s
+    try:
+        s = six.text_type(s, 'utf-8')
+    except UnicodeDecodeError as err:
+        s = six.text_type(s, 'utf-8', 'ignore')
+    return s
+
 # Character constants.
 REPLACEMENT_CHARACTER = 0xFFFD
 NOT_A_CHARACTER = 0xFFFF
@@ -127,7 +137,7 @@ class Main:
                         self.mnemonics = Mnemonics()
                     self.mnemonics.digest_mnemonics_ds(input)
                     break
-                if input.match('Network Working Group +K\. Simonsen$'):
+                if input.match('Network Working Group +K\\. Simonsen$'):
                     if (self.charnames
                             and self.charnames.do_sources
                             and not French_option):
@@ -201,12 +211,15 @@ class Charnames(Options):
 
     def digest_french(self, input):
         self.preset_french()
-        fold_table = range(256)
-        for before, after in map(
-                None,
+        fold_table = list(range(256))
+        def myord(c):
+            if isinstance(c, int):
+                return c
+            return ord(c)
+        for before, after in zip(
                 u'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÂÇÈÉÊÎÏÑÔÖÛ'.encode('ISO-8859-1'),
                 u'abcdefghijklmnopqrstuvwxyzàâçèéêîïñôöû'.encode('ISO-8859-1')):
-            fold_table[ord(before)] = ord(after)
+            fold_table[myord(before)] = myord(after)
         folding = ''.join(map(chr, fold_table))
         ignorables = (
                 u'<commande>'.encode('ISO-8859-1'),
@@ -314,6 +327,8 @@ class Charnames(Options):
         if len(text) > self.max_length:
             self.max_length = len(text)
         for word in text.split():
+            word = to_unicode(word)
+            assert isinstance(word, str)
             self.code_map[word] = self.code_map.get(word, 0) + 1
 
     def presort_word(self, word):
@@ -334,18 +349,18 @@ class Charnames(Options):
         # the second cycling faster from 1 to 255.
         if run.verbose:
             sys.stdout.write('  sorting words...')
-        pairs = map(self.presort_word, self.code_map.keys())
+        pairs = list(map(self.presort_word, self.code_map.keys()))
         pairs.sort()
-        words = map(lambda pair: pair[1], pairs)
+        words = list(map(lambda pair: pair[1], pairs))
         pairs = None
         if run.verbose:
             sys.stdout.write(' %d of them\n' % len(words))
         count = len(words)
-        singles = (255 * 255 - count) / 254
+        singles = (255 * 255 - count) // 254
         # Transmit a few values for further usage by the C code.
         if run.verbose:
             sys.stdout.write('  sorting names...')
-        ucs2_table = self.charname_map.keys()
+        ucs2_table = list(self.charname_map.keys())
         ucs2_table.sort()
         if run.verbose:
             sys.stdout.write(' %d of them\n' % len(ucs2_table))
@@ -366,12 +381,14 @@ class Charnames(Options):
             word = words[counter]
             write('    %-28s/* \\%0.3o */\n'
                   % ('"%s",' % re.sub('"', r'\"', word), char1))
+            assert isinstance(word, str)
             self.code_map[words[counter]] = char1
             char1 += 1
         for counter in range(singles, count):
             word = words[counter]
             write('    %-28s/* \\%0.3o\\%0.3o */\n'
                   % ('"%s",' % re.sub('"', r'\"', word, 1), char1, char2))
+            assert isinstance(word, str)
             self.code_map[words[counter]] = 256 * char1 + char2
             if char2 == 255:
                 char1 += 1
@@ -397,7 +414,7 @@ class Charnames(Options):
                     if code < 256:
                         write('\\%0.3o' % code)
                     else:
-                        write('\\%0.3o\\%0.3o' % (code / 256, code % 256))
+                        write('\\%0.3o\\%0.3o' % (code // 256, code % 256))
                 else:
                     sys.stdout.write('??? %s\n' % word)
             write('"},\n')
@@ -540,7 +557,7 @@ class Mnemonics(Options):
                 continue
             if len(line) == 3:
                 continue
-            if input.begins('   \.\.\.'):
+            if input.begins('   \\.\\.\\.'):
                 continue
             if line == '   Presentation forms\n':
                 continue
@@ -667,7 +684,7 @@ class Mnemonics(Options):
               'static const struct entry table[TABLE_LENGTH] =\n'
               '  {\n')
         count = 0
-        indices = self.mnemonic_map.keys()
+        indices = list(self.mnemonic_map.keys())
         indices.sort()
         for ucs2 in indices:
             text = self.mnemonic_map[ucs2]
@@ -681,7 +698,7 @@ class Mnemonics(Options):
               'static const unsigned short inverse[TABLE_LENGTH] =\n'
               '  {')
         count = 0
-        keys = inverse_map.keys()
+        keys = list(inverse_map.keys())
         keys.sort()
         for text in keys:
             if count % 10 == 0:
@@ -744,7 +761,7 @@ class Strips(Options):
     def digest_rfc1345(self, input):
         self.init_write_data()
         # Informal canonical order of presentation.
-        CHARSET, REM, ALIAS, ESC, BITS, CODE = range(6)
+        CHARSET, REM, ALIAS, ESC, BITS, CODE = list(range(6))
         charset = None
         skip = False
         while True:
@@ -956,7 +973,7 @@ class Strips(Options):
             if input.search('\032'):
                 # Old MS-DOS C-z !!
                 break
-            match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t\#')
+            match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t#')
             if match:
                 self.table[int(match.group(1), 16)] = int(match.group(2), 16)
             else:
@@ -1125,7 +1142,7 @@ class Strips(Options):
             write = Output('fr-%s' % self.TEXINFO, noheader=True).write
         else:
             write = Output(self.TEXINFO, noheader=True).write
-        charsets = self.remark_map.keys()
+        charsets = list(self.remark_map.keys())
         charsets.sort()
         for charset in charsets:
             write('\n'
@@ -1161,12 +1178,12 @@ class Input:
 
     def __init__(self, name):
         self.name = name
-        self.input = file(name)
+        self.input = open(name, "rb")
         self.line_count = 0
         sys.stdout.write("Reading %s\n" % name)
 
     def readline(self):
-        self.line = self.input.readline()
+        self.line = to_unicode(self.input.readline())
         self.line_count += 1
         return self.line
 
@@ -1184,16 +1201,16 @@ class Input:
         return self.line[:len(text)] == text
 
     def match(self, pattern):
-        return re.match(pattern, self.line)
+        return re.match(pattern, to_unicode(self.line))
 
     def search(self, pattern):
-        return re.search(pattern, self.line)
+        return re.search(pattern, to_unicode(self.line))
 
 class Output:
 
     def __init__(self, name, noheader=False):
         self.name = name
-        self.write = file(name, 'w').write
+        self.write = open(name, 'w').write
         sys.stdout.write("Writing %s\n" % name)
         if not noheader:
             self.write("""\
-- 
2.21.0
3.7.1 bump 2019-08-16 10:08:47 +00:00			`From 98a1eff200a60d81f404b2874db24a88ee2a592f Mon Sep 17 00:00:00 2001`
			`From: Shlomi Fish <shlomif@shlomifish.org>`
			`Date: Tue, 25 Jun 2019 12:23:39 +0300`
			`Subject: [PATCH] python 3 compatibility.`
			`MIME-Version: 1.0`
			`Content-Type: text/plain; charset=UTF-8`
			`Content-Transfer-Encoding: 8bit`

			`See https://github.com/rrthomas/recode/issues/15 .`

			`Petr Pisar: Ported to 3.7.1 from py3-take2 branch of`
			`<https://github.com/shlomif/recode>:`

			`commit 04aefb26fa080c8e9d6ba7a136a8ae263727fba8`
			`Author: Shlomi Fish <shlomif@shlomifish.org>`
			`Date: Tue Jun 25 12:23:39 2019 +0300`

			`Signed-off-by: Petr Písař <ppisar@redhat.com>`
			`---`
			`tables.py \| 59 +++++++++++++++++++++++++++++++++++--------------------`
			`1 file changed, 38 insertions(+), 21 deletions(-)`

			`diff --git a/tables.py b/tables.py`
			`index 2604dfe..51752a6 100755`
			`--- a/tables.py`
			`+++ b/tables.py`
			@@ -43,6 +43,16 @@ When `-F' and `-n' are used, process Alain's tables.

			`import re, sys`

			`+def to_unicode(s):`
			`+ import six`
			`+ if isinstance(s, six.text_type):`
			`+ return s`
			`+ try:`
			`+ s = six.text_type(s, 'utf-8')`
			`+ except UnicodeDecodeError as err:`
			`+ s = six.text_type(s, 'utf-8', 'ignore')`
			`+ return s`
			`+`
			`# Character constants.`
			`REPLACEMENT_CHARACTER = 0xFFFD`
			`NOT_A_CHARACTER = 0xFFFF`
			`@@ -127,7 +137,7 @@ class Main:`
			`self.mnemonics = Mnemonics()`
			`self.mnemonics.digest_mnemonics_ds(input)`
			`break`
			`- if input.match('Network Working Group +K\. Simonsen$'):`
			`+ if input.match('Network Working Group +K\\. Simonsen$'):`
			`if (self.charnames`
			`and self.charnames.do_sources`
			`and not French_option):`
			`@@ -201,12 +211,15 @@ class Charnames(Options):`

			`def digest_french(self, input):`
			`self.preset_french()`
			`- fold_table = range(256)`
			`- for before, after in map(`
			`- None,`
			`+ fold_table = list(range(256))`
			`+ def myord(c):`
			`+ if isinstance(c, int):`
			`+ return c`
			`+ return ord(c)`
			`+ for before, after in zip(`
			`u'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÂÇÈÉÊÎÏÑÔÖÛ'.encode('ISO-8859-1'),`
			`u'abcdefghijklmnopqrstuvwxyzàâçèéêîïñôöû'.encode('ISO-8859-1')):`
			`- fold_table[ord(before)] = ord(after)`
			`+ fold_table[myord(before)] = myord(after)`
			`folding = ''.join(map(chr, fold_table))`
			`ignorables = (`
			`u'<commande>'.encode('ISO-8859-1'),`
			`@@ -314,6 +327,8 @@ class Charnames(Options):`
			`if len(text) > self.max_length:`
			`self.max_length = len(text)`
			`for word in text.split():`
			`+ word = to_unicode(word)`
			`+ assert isinstance(word, str)`
			`self.code_map[word] = self.code_map.get(word, 0) + 1`

			`def presort_word(self, word):`
			`@@ -334,18 +349,18 @@ class Charnames(Options):`
			`# the second cycling faster from 1 to 255.`
			`if run.verbose:`
			`sys.stdout.write(' sorting words...')`
			`- pairs = map(self.presort_word, self.code_map.keys())`
			`+ pairs = list(map(self.presort_word, self.code_map.keys()))`
			`pairs.sort()`
			`- words = map(lambda pair: pair[1], pairs)`
			`+ words = list(map(lambda pair: pair[1], pairs))`
			`pairs = None`
			`if run.verbose:`
			`sys.stdout.write(' %d of them\n' % len(words))`
			`count = len(words)`
			`- singles = (255 * 255 - count) / 254`
			`+ singles = (255 * 255 - count) // 254`
			`# Transmit a few values for further usage by the C code.`
			`if run.verbose:`
			`sys.stdout.write(' sorting names...')`
			`- ucs2_table = self.charname_map.keys()`
			`+ ucs2_table = list(self.charname_map.keys())`
			`ucs2_table.sort()`
			`if run.verbose:`
			`sys.stdout.write(' %d of them\n' % len(ucs2_table))`
			`@@ -366,12 +381,14 @@ class Charnames(Options):`
			`word = words[counter]`
			`write(' %-28s/* \\%0.3o */\n'`
			`% ('"%s",' % re.sub('"', r'\"', word), char1))`
			`+ assert isinstance(word, str)`
			`self.code_map[words[counter]] = char1`
			`char1 += 1`
			`for counter in range(singles, count):`
			`word = words[counter]`
			`write(' %-28s/* \\%0.3o\\%0.3o */\n'`
			`% ('"%s",' % re.sub('"', r'\"', word, 1), char1, char2))`
			`+ assert isinstance(word, str)`
			`self.code_map[words[counter]] = 256 * char1 + char2`
			`if char2 == 255:`
			`char1 += 1`
			`@@ -397,7 +414,7 @@ class Charnames(Options):`
			`if code < 256:`
			`write('\\%0.3o' % code)`
			`else:`
			`- write('\\%0.3o\\%0.3o' % (code / 256, code % 256))`
			`+ write('\\%0.3o\\%0.3o' % (code // 256, code % 256))`
			`else:`
			`sys.stdout.write('??? %s\n' % word)`
			`write('"},\n')`
			`@@ -540,7 +557,7 @@ class Mnemonics(Options):`
			`continue`
			`if len(line) == 3:`
			`continue`
			`- if input.begins(' \.\.\.'):`
			`+ if input.begins(' \\.\\.\\.'):`
			`continue`
			`if line == ' Presentation forms\n':`
			`continue`
			`@@ -667,7 +684,7 @@ class Mnemonics(Options):`
			`'static const struct entry table[TABLE_LENGTH] =\n'`
			`' {\n')`
			`count = 0`
			`- indices = self.mnemonic_map.keys()`
			`+ indices = list(self.mnemonic_map.keys())`
			`indices.sort()`
			`for ucs2 in indices:`
			`text = self.mnemonic_map[ucs2]`
			`@@ -681,7 +698,7 @@ class Mnemonics(Options):`
			`'static const unsigned short inverse[TABLE_LENGTH] =\n'`
			`' {')`
			`count = 0`
			`- keys = inverse_map.keys()`
			`+ keys = list(inverse_map.keys())`
			`keys.sort()`
			`for text in keys:`
			`if count % 10 == 0:`
			`@@ -744,7 +761,7 @@ class Strips(Options):`
			`def digest_rfc1345(self, input):`
			`self.init_write_data()`
			`# Informal canonical order of presentation.`
			`- CHARSET, REM, ALIAS, ESC, BITS, CODE = range(6)`
			`+ CHARSET, REM, ALIAS, ESC, BITS, CODE = list(range(6))`
			`charset = None`
			`skip = False`
			`while True:`
			`@@ -956,7 +973,7 @@ class Strips(Options):`
			`if input.search('\032'):`
			`# Old MS-DOS C-z !!`
			`break`
			`- match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t\#')`
			`+ match = input.match('0x([0-9A-F]+)\t0x([0-9A-F]+)\t#')`
			`if match:`
			`self.table[int(match.group(1), 16)] = int(match.group(2), 16)`
			`else:`
			`@@ -1125,7 +1142,7 @@ class Strips(Options):`
			`write = Output('fr-%s' % self.TEXINFO, noheader=True).write`
			`else:`
			`write = Output(self.TEXINFO, noheader=True).write`
			`- charsets = self.remark_map.keys()`
			`+ charsets = list(self.remark_map.keys())`
			`charsets.sort()`
			`for charset in charsets:`
			`write('\n'`
			`@@ -1161,12 +1178,12 @@ class Input:`

			`def __init__(self, name):`
			`self.name = name`
			`- self.input = file(name)`
			`+ self.input = open(name, "rb")`
			`self.line_count = 0`
			`sys.stdout.write("Reading %s\n" % name)`

			`def readline(self):`
			`- self.line = self.input.readline()`
			`+ self.line = to_unicode(self.input.readline())`
			`self.line_count += 1`
			`return self.line`

			`@@ -1184,16 +1201,16 @@ class Input:`
			`return self.line[:len(text)] == text`

			`def match(self, pattern):`
			`- return re.match(pattern, self.line)`
			`+ return re.match(pattern, to_unicode(self.line))`

			`def search(self, pattern):`
			`- return re.search(pattern, self.line)`
			`+ return re.search(pattern, to_unicode(self.line))`

			`class Output:`

			`def __init__(self, name, noheader=False):`
			`self.name = name`
			`- self.write = file(name, 'w').write`
			`+ self.write = open(name, 'w').write`
			`sys.stdout.write("Writing %s\n" % name)`
			`if not noheader:`
			`self.write("""\`
			`--`
			`2.21.0`