diff --git a/ibus-table-1.4.99.20120907-improve-chinese-category-check.patch b/ibus-table-1.4.99.20120907-improve-chinese-category-check.patch new file mode 100644 index 0000000..4e4ec8c --- /dev/null +++ b/ibus-table-1.4.99.20120907-improve-chinese-category-check.patch @@ -0,0 +1,69 @@ +From 7a2eed5fb33098b10d67958472a23bb41a3f7b11 Mon Sep 17 00:00:00 2001 +From: Mike FABIAN +Date: Thu, 13 Sep 2012 12:43:55 +0200 +Subject: [PATCH] Improve check whether a phrase is simplified or traditional + Chinese + +The improvement is to ignore all non-Han characters when +doing the check. + +This is to avoid classifying a simplified Chinese string as +traditional just because it happens to include some non-Chinese +characters, for example box drawing characters, which cannot be +converted to gb2312 but happen to be convertible to big5hkscs. + +This fixes the problem in the emoji-table input method that most +phrases cannot be input at all. + +See: https://bugzilla.redhat.com/show_bug.cgi?id=856320 +--- + engine/tabsqlitedb.py | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/engine/tabsqlitedb.py b/engine/tabsqlitedb.py +index 7606afb..9e3b50c 100644 +--- a/engine/tabsqlitedb.py ++++ b/engine/tabsqlitedb.py +@@ -483,17 +483,27 @@ class tabsqlitedb: + # make sure that we got a unicode string + if type(phrase) != type(u''): + phrase = phrase.decode('utf8') ++ tmp_phrase = ''.join(re.findall(u'[' ++ + u'\u4E00-\u9FCB' ++ + u'\u3400-\u4DB5' ++ + u'\uF900-\uFaFF' ++ + u'\U00020000-\U0002A6D6' ++ + u'\U0002A700-\U0002B734' ++ + u'\U0002B740-\U0002B81D' ++ + u'\U0002F800-\U0002FA1D' ++ + u']+', ++ phrase)) + # first whether in gb2312 + try: +- phrase.encode('gb2312') ++ tmp_phrase.encode('gb2312') + category |= 1 + except: +- if '〇'.decode('utf8') in phrase: ++ if '〇'.decode('utf8') in tmp_phrase: + # we add '〇' into SC as well + category |= 1 + # second check big5-hkscs + try: +- phrase.encode('big5hkscs') ++ tmp_phrase.encode('big5hkscs') + category |= 1 << 1 + except: + # then check whether in gbk, +@@ -503,7 +513,7 @@ class tabsqlitedb: + else: + # need to check + try: +- phrase.encode('gbk') ++ tmp_phrase.encode('gbk') + category |= 1 + except: + # not in gbk +-- +1.7.11.4 + diff --git a/ibus-table.spec b/ibus-table.spec index 816cc3c..7f1a946 100644 --- a/ibus-table.spec +++ b/ibus-table.spec @@ -1,12 +1,13 @@ Name: ibus-table Version: 1.4.99.20120907 -Release: 1%{?dist} +Release: 2%{?dist} Summary: The Table engine for IBus platform License: LGPLv2+ Group: System Environment/Libraries URL: http://code.google.com/p/ibus/ Source0: http://mfabian.fedorapeople.org/ibus-table/%{name}-%{version}.tar.gz Patch1: ibus-table-1.3.9.20110827-uppercase-umlauts.patch +Patch2: ibus-table-1.4.99.20120907-improve-chinese-category-check.patch Requires: ibus > 1.3.0 BuildRequires: ibus-devel > 1.3.0 @@ -30,6 +31,7 @@ Development files for %{name}. %prep %setup -q %patch1 -p1 -b .uppercase-umlauts +%patch2 -p1 -b .improve-chinese-category-check %build %configure --disable-static --disable-additional @@ -100,6 +102,18 @@ Development files for %{name}. %{_datadir}/pkgconfig/%{name}.pc %changelog +* Tue Sep 11 2012 Mike FABIAN - 1.4.99.20120907-2 +- Resolves: #856320 +- Improve check whether a phrase is simplified or traditional Chinese + The improvement is to ignore all non-Han characters when + doing the check. + This is to avoid classifying a simplified Chinese string as + traditional just because it happens to include some non-Chinese + characters, for example box drawing characters, which cannot be + converted to gb2312 but happen to be convertible to big5hkscs. + This fixes the problem in the emoji-table input method that most + phrases cannot be input at all. + * Fri Sep 07 2012 Mike FABIAN - 1.4.99.20120907-1 - Relates: #855250 - see comment#1 in #855250