update to 0.4.91
This commit is contained in:
parent
27618325a2
commit
26543fafc3
@ -1,86 +0,0 @@
|
||||
From f332a01334342bdd4169324bdf889386ff3676fa Mon Sep 17 00:00:00 2001
|
||||
From: Peng Wu <alexepico@gmail.com>
|
||||
Date: Thu, 24 Nov 2011 13:02:10 +0800
|
||||
Subject: [PATCH 1/3] increase train_factor because of larger model data
|
||||
|
||||
---
|
||||
src/lookup/pinyin_lookup.cpp | 4 ++--
|
||||
1 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp
|
||||
index d6ba68c..7146e51 100644
|
||||
--- a/src/lookup/pinyin_lookup.cpp
|
||||
+++ b/src/lookup/pinyin_lookup.cpp
|
||||
@@ -449,7 +449,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
|
||||
//TODO: verify the new training method.
|
||||
phrase_token_t last_token = sentence_start;
|
||||
// constraints->len + 1 == results->len
|
||||
- guint32 train_factor = 23;
|
||||
+ guint32 train_factor = 23 * 5;
|
||||
for ( size_t i = 0; i < constraints->len; ++i){
|
||||
phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
|
||||
if ( *token == null_token )
|
||||
@@ -466,7 +466,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
|
||||
//std::cout<<"i:"<<i<<"last_token:"<<last_token<<"\ttoken:"<<*token<<std::endl;
|
||||
m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
|
||||
m_cache_phrase_item.increase_pinyin_possibility(*m_custom, pinyin_keys + i, train_factor);
|
||||
- m_phrase_index->add_unigram_frequency(*token, train_factor);
|
||||
+ m_phrase_index->add_unigram_frequency(*token, train_factor * 10);
|
||||
if ( last_token ){
|
||||
SingleGram * system, *user;
|
||||
m_system_bigram->load(last_token, system);
|
||||
--
|
||||
1.7.7.3
|
||||
|
||||
|
||||
From de8057576011eb536d87194da10c9ec48dd8d092 Mon Sep 17 00:00:00 2001
|
||||
From: Peng Wu <alexepico@gmail.com>
|
||||
Date: Fri, 25 Nov 2011 14:58:45 +0800
|
||||
Subject: [PATCH 2/3] add const modifiers to train factor
|
||||
|
||||
---
|
||||
src/lookup/pinyin_lookup.cpp | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp
|
||||
index 7146e51..e2f563c 100644
|
||||
--- a/src/lookup/pinyin_lookup.cpp
|
||||
+++ b/src/lookup/pinyin_lookup.cpp
|
||||
@@ -449,7 +449,7 @@ bool PinyinLookup::train_result(PinyinKeyVector keys, CandidateConstraints const
|
||||
//TODO: verify the new training method.
|
||||
phrase_token_t last_token = sentence_start;
|
||||
// constraints->len + 1 == results->len
|
||||
- guint32 train_factor = 23 * 5;
|
||||
+ const guint32 train_factor = 23 * 5;
|
||||
for ( size_t i = 0; i < constraints->len; ++i){
|
||||
phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
|
||||
if ( *token == null_token )
|
||||
--
|
||||
1.7.7.3
|
||||
|
||||
|
||||
From 47dca981b4d0f155f80087ee892bd2ff80429e7c Mon Sep 17 00:00:00 2001
|
||||
From: Peng Wu <alexepico@gmail.com>
|
||||
Date: Fri, 25 Nov 2011 15:18:42 +0800
|
||||
Subject: [PATCH 3/3] update lambda parameter
|
||||
|
||||
---
|
||||
src/include/novel_types.h | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/src/include/novel_types.h b/src/include/novel_types.h
|
||||
index 1c4fb2b..110d041 100644
|
||||
--- a/src/include/novel_types.h
|
||||
+++ b/src/include/novel_types.h
|
||||
@@ -144,7 +144,7 @@ typedef guint32 table_offset_t;
|
||||
|
||||
typedef double parameter_t;
|
||||
|
||||
-#define LAMBDA_PARAMETER 0.588792
|
||||
+#define LAMBDA_PARAMETER 0.330642
|
||||
|
||||
/* Array of phrase_token_t */
|
||||
typedef GArray * TokenVector;
|
||||
--
|
||||
1.7.7.3
|
||||
|
||||
141
libpinyin-0.5.x-head.patch
Normal file
141
libpinyin-0.5.x-head.patch
Normal file
@ -0,0 +1,141 @@
|
||||
From 11dfb3b72c7128e05e8608ff501d06b80a2788c1 Mon Sep 17 00:00:00 2001
|
||||
From: Peng Wu <alexepico@gmail.com>
|
||||
Date: Mon, 26 Dec 2011 14:56:09 +0800
|
||||
Subject: [PATCH 1/4] fixes pinyin_translate_token
|
||||
|
||||
---
|
||||
src/pinyin.cpp | 4 ++--
|
||||
1 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
|
||||
index f4a20fe..a3fd37d 100644
|
||||
--- a/src/pinyin.cpp
|
||||
+++ b/src/pinyin.cpp
|
||||
@@ -590,11 +590,11 @@ bool pinyin_translate_token(pinyin_instance_t * instance,
|
||||
PhraseItem item;
|
||||
utf16_t buffer[MAX_PHRASE_LENGTH];
|
||||
|
||||
- bool retval = context->m_phrase_index->get_phrase_item(token, item);
|
||||
+ int retval = context->m_phrase_index->get_phrase_item(token, item);
|
||||
item.get_phrase_string(buffer);
|
||||
guint8 length = item.get_phrase_length();
|
||||
*word = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
|
||||
- return retval;
|
||||
+ return retval == ERROR_OK;
|
||||
}
|
||||
|
||||
bool pinyin_train(pinyin_instance_t * instance){
|
||||
--
|
||||
1.7.7.4
|
||||
|
||||
|
||||
From 49869f6917edf488f0daca22e32a8166cf6e0325 Mon Sep 17 00:00:00 2001
|
||||
From: Peng Wu <alexepico@gmail.com>
|
||||
Date: Mon, 26 Dec 2011 15:17:46 +0800
|
||||
Subject: [PATCH 2/4] increase train factor
|
||||
|
||||
---
|
||||
src/lookup/pinyin_lookup.cpp | 2 +-
|
||||
src/pinyin.cpp | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/lookup/pinyin_lookup.cpp b/src/lookup/pinyin_lookup.cpp
|
||||
index a01b28a..efad86f 100644
|
||||
--- a/src/lookup/pinyin_lookup.cpp
|
||||
+++ b/src/lookup/pinyin_lookup.cpp
|
||||
@@ -446,7 +446,7 @@ bool PinyinLookup::train_result(ChewingKeyVector keys, CandidateConstraints cons
|
||||
//TODO: verify the new training method.
|
||||
phrase_token_t last_token = sentence_start;
|
||||
// constraints->len + 1 == results->len
|
||||
- const guint32 train_factor = 23 * 5;
|
||||
+ const guint32 train_factor = 23 * 25;
|
||||
for ( size_t i = 0; i < constraints->len; ++i){
|
||||
phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
|
||||
if ( *token == null_token )
|
||||
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
|
||||
index a3fd37d..5a1b683 100644
|
||||
--- a/src/pinyin.cpp
|
||||
+++ b/src/pinyin.cpp
|
||||
@@ -594,7 +594,7 @@ bool pinyin_translate_token(pinyin_instance_t * instance,
|
||||
item.get_phrase_string(buffer);
|
||||
guint8 length = item.get_phrase_length();
|
||||
*word = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
|
||||
- return retval == ERROR_OK;
|
||||
+ return ERROR_OK == retval;
|
||||
}
|
||||
|
||||
bool pinyin_train(pinyin_instance_t * instance){
|
||||
--
|
||||
1.7.7.4
|
||||
|
||||
|
||||
From 2fd2eea102bfa32d662dca823bf1cfeed3a94c1c Mon Sep 17 00:00:00 2001
|
||||
From: Peng Wu <alexepico@gmail.com>
|
||||
Date: Mon, 26 Dec 2011 18:34:49 +0800
|
||||
Subject: [PATCH 3/4] fixes parallel make
|
||||
|
||||
---
|
||||
data/Makefile.am | 10 ++++++++--
|
||||
1 files changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/data/Makefile.am b/data/Makefile.am
|
||||
index 25e4683..6f74f4e 100644
|
||||
--- a/data/Makefile.am
|
||||
+++ b/data/Makefile.am
|
||||
@@ -34,16 +34,22 @@ libpinyin_dbdir = $(pkgdatadir)/data
|
||||
|
||||
CLEANFILES = $(binary_model_data)
|
||||
|
||||
-$(textual_model_data):
|
||||
+model.text.tar.gz:
|
||||
wget https://github.com/downloads/libpinyin/libpinyin/model.text.tar.gz
|
||||
+
|
||||
+interpolation.text: model.text.tar.gz
|
||||
tar xvf model.text.tar.gz
|
||||
|
||||
-$(binary_model_data): $(textual_model_data)
|
||||
+gb_char.table gbk_char.table: interpolation.text
|
||||
+
|
||||
+bigram.db: $(textual_model_data)
|
||||
$(RM) $(binary_model_data)
|
||||
../utils/storage/gen_binary_files --table-dir $(top_srcdir)/data
|
||||
../utils/storage/import_interpolation < $(top_srcdir)/data/interpolation.text
|
||||
../utils/training/gen_unigram
|
||||
|
||||
+gb_char.bin gbk_char.bin phrase_index.bin pinyin_index.bin: bigram.db
|
||||
+
|
||||
rebuild:
|
||||
git reset --hard
|
||||
sed -i -e "s/#define LAMBDA_PARAMETER 0.588792/#define LAMBDA_PARAMETER $(LAMBDA_PARAMETER)/" ../src/include/novel_types.h
|
||||
--
|
||||
1.7.7.4
|
||||
|
||||
|
||||
From 13aac5af9c6e3f3a2e243806f102e7477b686c91 Mon Sep 17 00:00:00 2001
|
||||
From: Peng Wu <alexepico@gmail.com>
|
||||
Date: Tue, 27 Dec 2011 11:09:06 +0800
|
||||
Subject: [PATCH 4/4] fixes data/Makefile.am
|
||||
|
||||
---
|
||||
data/Makefile.am | 4 +---
|
||||
1 files changed, 1 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/data/Makefile.am b/data/Makefile.am
|
||||
index 6f74f4e..2964bb9 100644
|
||||
--- a/data/Makefile.am
|
||||
+++ b/data/Makefile.am
|
||||
@@ -34,10 +34,8 @@ libpinyin_dbdir = $(pkgdatadir)/data
|
||||
|
||||
CLEANFILES = $(binary_model_data)
|
||||
|
||||
-model.text.tar.gz:
|
||||
+interpolation.text:
|
||||
wget https://github.com/downloads/libpinyin/libpinyin/model.text.tar.gz
|
||||
-
|
||||
-interpolation.text: model.text.tar.gz
|
||||
tar xvf model.text.tar.gz
|
||||
|
||||
gb_char.table gbk_char.table: interpolation.text
|
||||
--
|
||||
1.7.7.4
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
Name: libpinyin
|
||||
Version: 0.3.0
|
||||
Release: 2%{?dist}
|
||||
Version: 0.4.91
|
||||
Release: 1%{?dist}
|
||||
Summary: Library to deal with pinyin
|
||||
|
||||
License: GPLv2+
|
||||
URL: https://github.com/libpinyin/libpinyin
|
||||
Source0: https://github.com/downloads/libpinyin/libpinyin/%{name}-%{version}.tar.gz
|
||||
Patch0: libpinyin-0.3.x-head.patch
|
||||
Patch0: libpinyin-0.5.x-head.patch
|
||||
|
||||
BuildRequires: db4-devel, glib2-devel
|
||||
Requires: %{name}-data = %{version}-%{release}
|
||||
@ -70,6 +70,9 @@ find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';'
|
||||
%{_datadir}/libpinyin/data
|
||||
|
||||
%changelog
|
||||
* Tue Dec 27 2011 Peng Wu <pwu@redhat.com> - 0.4.91-1
|
||||
- Update to 0.4.91
|
||||
|
||||
* Fri Nov 25 2011 Peng Wu <pwu@redhat.com> - 0.3.0-2
|
||||
- Increase train factor
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user