295 lines
9.3 KiB
Diff
295 lines
9.3 KiB
Diff
From 11dfb3b72c7128e05e8608ff501d06b80a2788c1 Mon Sep 17 00:00:00 2001
|
|
From: Peng Wu <alexepico@gmail.com>
|
|
Date: Mon, 26 Dec 2011 14:56:09 +0800
|
|
Subject: [PATCH 1/7] fixes pinyin_translate_token
|
|
|
|
---
|
|
src/pinyin.cpp | 4 ++--
|
|
1 files changed, 2 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
|
|
index f4a20fe..a3fd37d 100644
|
|
--- a/src/pinyin.cpp
|
|
+++ b/src/pinyin.cpp
|
|
@@ -590,11 +590,11 @@ bool pinyin_translate_token(pinyin_instance_t * instance,
|
|
PhraseItem item;
|
|
utf16_t buffer[MAX_PHRASE_LENGTH];
|
|
|
|
- bool retval = context->m_phrase_index->get_phrase_item(token, item);
|
|
+ int retval = context->m_phrase_index->get_phrase_item(token, item);
|
|
item.get_phrase_string(buffer);
|
|
guint8 length = item.get_phrase_length();
|
|
*word = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
|
|
- return retval;
|
|
+ return retval == ERROR_OK;
|
|
}
|
|
|
|
bool pinyin_train(pinyin_instance_t * instance){
|
|
--
|
|
1.7.7.4
|
|
|
|
|
|
From 49869f6917edf488f0daca22e32a8166cf6e0325 Mon Sep 17 00:00:00 2001
|
|
From: Peng Wu <alexepico@gmail.com>
|
|
Date: Mon, 26 Dec 2011 15:17:46 +0800
|
|
Subject: [PATCH 2/7] increase train factor
|
|
|
|
---
|
|
src/lookup/pinyin_lookup.cpp | 2 +-
|
|
src/pinyin.cpp | 2 +-
|
|
2 files changed, 2 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp
|
|
index a01b28a..efad86f 100644
|
|
--- a/src/lookup/pinyin_lookup.cpp
|
|
+++ b/src/lookup/pinyin_lookup.cpp
|
|
@@ -446,7 +446,7 @@ bool PinyinLookup::train_result(ChewingKeyVector keys, CandidateConstraints cons
|
|
//TODO: verify the new training method.
|
|
phrase_token_t last_token = sentence_start;
|
|
// constraints->len + 1 == results->len
|
|
- const guint32 train_factor = 23 * 5;
|
|
+ const guint32 train_factor = 23 * 25;
|
|
for ( size_t i = 0; i < constraints->len; ++i){
|
|
phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
|
|
if ( *token == null_token )
|
|
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
|
|
index a3fd37d..5a1b683 100644
|
|
--- a/src/pinyin.cpp
|
|
+++ b/src/pinyin.cpp
|
|
@@ -594,7 +594,7 @@ bool pinyin_translate_token(pinyin_instance_t * instance,
|
|
item.get_phrase_string(buffer);
|
|
guint8 length = item.get_phrase_length();
|
|
*word = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
|
|
- return retval == ERROR_OK;
|
|
+ return ERROR_OK == retval;
|
|
}
|
|
|
|
bool pinyin_train(pinyin_instance_t * instance){
|
|
--
|
|
1.7.7.4
|
|
|
|
|
|
From 2fd2eea102bfa32d662dca823bf1cfeed3a94c1c Mon Sep 17 00:00:00 2001
|
|
From: Peng Wu <alexepico@gmail.com>
|
|
Date: Mon, 26 Dec 2011 18:34:49 +0800
|
|
Subject: [PATCH 3/7] fixes parallel make
|
|
|
|
---
|
|
data/Makefile.am | 10 ++++++++--
|
|
1 files changed, 8 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/data/Makefile.am b/data/Makefile.am
|
|
index 25e4683..6f74f4e 100644
|
|
--- a/data/Makefile.am
|
|
+++ b/data/Makefile.am
|
|
@@ -34,16 +34,22 @@ libpinyin_dbdir = $(pkgdatadir)/data
|
|
|
|
CLEANFILES = $(binary_model_data)
|
|
|
|
-$(textual_model_data):
|
|
+model.text.tar.gz:
|
|
wget https://github.com/downloads/libpinyin/libpinyin/model.text.tar.gz
|
|
+
|
|
+interpolation.text: model.text.tar.gz
|
|
tar xvf model.text.tar.gz
|
|
|
|
-$(binary_model_data): $(textual_model_data)
|
|
+gb_char.table gbk_char.table: interpolation.text
|
|
+
|
|
+bigram.db: $(textual_model_data)
|
|
$(RM) $(binary_model_data)
|
|
../utils/storage/gen_binary_files --table-dir $(top_srcdir)/data
|
|
../utils/storage/import_interpolation < $(top_srcdir)/data/interpolation.text
|
|
../utils/training/gen_unigram
|
|
|
|
+gb_char.bin gbk_char.bin phrase_index.bin pinyin_index.bin: bigram.db
|
|
+
|
|
rebuild:
|
|
git reset --hard
|
|
sed -i -e "s/#define LAMBDA_PARAMETER 0.588792/#define LAMBDA_PARAMETER $(LAMBDA_PARAMETER)/" ../src/include/novel_types.h
|
|
--
|
|
1.7.7.4
|
|
|
|
|
|
From 13aac5af9c6e3f3a2e243806f102e7477b686c91 Mon Sep 17 00:00:00 2001
|
|
From: Peng Wu <alexepico@gmail.com>
|
|
Date: Tue, 27 Dec 2011 11:09:06 +0800
|
|
Subject: [PATCH 4/7] fixes data/Makefile.am
|
|
|
|
---
|
|
data/Makefile.am | 4 +---
|
|
1 files changed, 1 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/data/Makefile.am b/data/Makefile.am
|
|
index 6f74f4e..2964bb9 100644
|
|
--- a/data/Makefile.am
|
|
+++ b/data/Makefile.am
|
|
@@ -34,10 +34,8 @@ libpinyin_dbdir = $(pkgdatadir)/data
|
|
|
|
CLEANFILES = $(binary_model_data)
|
|
|
|
-model.text.tar.gz:
|
|
+interpolation.text:
|
|
wget https://github.com/downloads/libpinyin/libpinyin/model.text.tar.gz
|
|
-
|
|
-interpolation.text: model.text.tar.gz
|
|
tar xvf model.text.tar.gz
|
|
|
|
gb_char.table gbk_char.table: interpolation.text
|
|
--
|
|
1.7.7.4
|
|
|
|
|
|
From be97324565a43072744dc73a21ee4a2ae18f786c Mon Sep 17 00:00:00 2001
|
|
From: Peng Wu <alexepico@gmail.com>
|
|
Date: Thu, 29 Dec 2011 10:50:21 +0800
|
|
Subject: [PATCH 5/7] update ChangeLog
|
|
|
|
---
|
|
ChangeLog | 14 ++++++++++++--
|
|
1 files changed, 12 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/ChangeLog b/ChangeLog
|
|
index fed9c34..5c0237b 100644
|
|
--- a/ChangeLog
|
|
+++ b/ChangeLog
|
|
@@ -1,2 +1,12 @@
|
|
-2010-08-03 version 0.2.99 <alexepico@gmail.com>
|
|
-* Import from pinyin.
|
|
+version 0.4.91
|
|
+* New parsers for full pinyin/double pinyin/chewing.
|
|
+ * libpinyin now fully supports all pinyin auto corrections in
|
|
+ibus-pinyin;
|
|
+ * libpinyin now better supports an/ang, en/eng, in/ing fuzzy
|
|
+pinyin match.
|
|
+
|
|
+version 0.3.0
|
|
+* the first official release of 0.3.x.
|
|
+
|
|
+version 0.2.99
|
|
+* import from pinyin.
|
|
--
|
|
1.7.7.4
|
|
|
|
|
|
From d3eda7ee46fdab4f4b5bf3cf4e0e2ab1db447b9b Mon Sep 17 00:00:00 2001
|
|
From: Peng Wu <alexepico@gmail.com>
|
|
Date: Thu, 29 Dec 2011 10:52:03 +0800
|
|
Subject: [PATCH 6/7] update libpinyin.spec.in
|
|
|
|
---
|
|
libpinyin.spec.in | 2 +-
|
|
1 files changed, 1 insertions(+), 1 deletions(-)
|
|
|
|
diff --git a/libpinyin.spec.in b/libpinyin.spec.in
|
|
index c0d50d9..39f52bc 100644
|
|
--- a/libpinyin.spec.in
|
|
+++ b/libpinyin.spec.in
|
|
@@ -38,7 +38,7 @@ The %{name}-data package contains data files.
|
|
|
|
%build
|
|
%configure --disable-static
|
|
-make
|
|
+make %{?_smp_mflags}
|
|
|
|
|
|
%install
|
|
--
|
|
1.7.7.4
|
|
|
|
|
|
From 9f0f08875bfc4125a0b4b8a7a7b31314d7c876d8 Mon Sep 17 00:00:00 2001
|
|
From: Peng Wu <alexepico@gmail.com>
|
|
Date: Thu, 29 Dec 2011 11:01:55 +0800
|
|
Subject: [PATCH 7/7] update pinyin.cpp
|
|
|
|
---
|
|
src/pinyin.cpp | 22 +++++++++++-----------
|
|
1 files changed, 11 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
|
|
index 5a1b683..3663904 100644
|
|
--- a/src/pinyin.cpp
|
|
+++ b/src/pinyin.cpp
|
|
@@ -29,8 +29,8 @@
|
|
struct _pinyin_context_t{
|
|
pinyin_option_t m_options;
|
|
|
|
- FullPinyinParser2 * m_default_parser;
|
|
- DoublePinyinParser2 * m_shuang_pin_parser;
|
|
+ FullPinyinParser2 * m_full_pinyin_parser;
|
|
+ DoublePinyinParser2 * m_double_pinyin_parser;
|
|
ChewingParser2 * m_chewing_parser;
|
|
|
|
ChewingLargeTable * m_pinyin_table;
|
|
@@ -117,8 +117,8 @@ pinyin_context_t * pinyin_init(const char * systemdir, const char * userdir){
|
|
g_free(filename);
|
|
context->m_pinyin_table->load(chunk);
|
|
|
|
- context->m_default_parser = new FullPinyinParser2;
|
|
- context->m_shuang_pin_parser = new DoublePinyinParser2;
|
|
+ context->m_full_pinyin_parser = new FullPinyinParser2;
|
|
+ context->m_double_pinyin_parser = new DoublePinyinParser2;
|
|
context->m_chewing_parser = new ChewingParser2;
|
|
|
|
context->m_phrase_table = new PhraseLargeTable;
|
|
@@ -226,7 +226,7 @@ bool pinyin_save(pinyin_context_t * context){
|
|
|
|
bool pinyin_set_double_pinyin_scheme(pinyin_context_t * context,
|
|
DoublePinyinScheme scheme){
|
|
- context->m_shuang_pin_parser->set_scheme(scheme);
|
|
+ context->m_double_pinyin_parser->set_scheme(scheme);
|
|
return true;
|
|
}
|
|
|
|
@@ -238,8 +238,8 @@ bool pinyin_set_chewing_scheme(pinyin_context_t * context,
|
|
|
|
|
|
void pinyin_fini(pinyin_context_t * context){
|
|
- delete context->m_default_parser;
|
|
- delete context->m_shuang_pin_parser;
|
|
+ delete context->m_full_pinyin_parser;
|
|
+ delete context->m_double_pinyin_parser;
|
|
delete context->m_chewing_parser;
|
|
delete context->m_pinyin_table;
|
|
delete context->m_phrase_table;
|
|
@@ -358,7 +358,7 @@ bool pinyin_parse_full_pinyin(pinyin_instance_t * instance,
|
|
pinyin_context_t * & context = instance->m_context;
|
|
|
|
int pinyin_len = strlen(onepinyin);
|
|
- int parse_len = context->m_default_parser->parse_one_key
|
|
+ int parse_len = context->m_full_pinyin_parser->parse_one_key
|
|
( context->m_options, *onekey, onepinyin, pinyin_len);
|
|
return pinyin_len == parse_len;
|
|
}
|
|
@@ -368,7 +368,7 @@ size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
|
|
pinyin_context_t * & context = instance->m_context;
|
|
int pinyin_len = strlen(pinyins);
|
|
|
|
- int parse_len = context->m_default_parser->parse
|
|
+ int parse_len = context->m_full_pinyin_parser->parse
|
|
( context->m_options, instance->m_pinyin_keys,
|
|
instance->m_pinyin_key_rests, pinyins, pinyin_len);
|
|
|
|
@@ -381,7 +381,7 @@ bool pinyin_parse_double_pinyin(pinyin_instance_t * instance,
|
|
pinyin_context_t * & context = instance->m_context;
|
|
|
|
int pinyin_len = strlen(onepinyin);
|
|
- int parse_len = context->m_shuang_pin_parser->parse_one_key
|
|
+ int parse_len = context->m_double_pinyin_parser->parse_one_key
|
|
( context->m_options, *onekey, onepinyin, pinyin_len);
|
|
return pinyin_len == parse_len;
|
|
}
|
|
@@ -391,7 +391,7 @@ size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
|
|
pinyin_context_t * & context = instance->m_context;
|
|
int pinyin_len = strlen(pinyins);
|
|
|
|
- int parse_len = context->m_shuang_pin_parser->parse
|
|
+ int parse_len = context->m_double_pinyin_parser->parse
|
|
( context->m_options, instance->m_pinyin_keys,
|
|
instance->m_pinyin_key_rests, pinyins, pinyin_len);
|
|
|
|
--
|
|
1.7.7.4
|
|
|