add 8bit dictionary word chars to the unicode list
This commit is contained in:
parent
ebefbaa051
commit
0a1b5e5742
@ -1,203 +0,0 @@
|
|||||||
--- /dev/null 2007-03-16 11:16:34.774869017 +0000
|
|
||||||
+++ hunspell-1.1.5/src/tools/munch.h 2007-03-20 09:19:49.000000000 +0000
|
|
||||||
@@ -0,0 +1,121 @@
|
|
||||||
+/* munch header file */
|
|
||||||
+
|
|
||||||
+#define MAX_LN_LEN 200
|
|
||||||
+#define MAX_WD_LEN 200
|
|
||||||
+#define MAX_PREFIXES 256
|
|
||||||
+#define MAX_SUFFIXES 256
|
|
||||||
+#define MAX_ROOTS 20
|
|
||||||
+#define MAX_WORDS 5000
|
|
||||||
+
|
|
||||||
+#define ROTATE_LEN 5
|
|
||||||
+
|
|
||||||
+#define ROTATE(v,q) \
|
|
||||||
+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
|
|
||||||
+
|
|
||||||
+#define SET_SIZE 256
|
|
||||||
+
|
|
||||||
+#define XPRODUCT (1 << 0)
|
|
||||||
+
|
|
||||||
+/* the affix table entry */
|
|
||||||
+
|
|
||||||
+struct affent
|
|
||||||
+{
|
|
||||||
+ char * appnd;
|
|
||||||
+ char * strip;
|
|
||||||
+ short appndl;
|
|
||||||
+ short stripl;
|
|
||||||
+ char achar;
|
|
||||||
+ char xpflg;
|
|
||||||
+ short numconds;
|
|
||||||
+ char conds[SET_SIZE];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+struct affixptr
|
|
||||||
+{
|
|
||||||
+ struct affent * aep;
|
|
||||||
+ int num;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+/* the prefix and suffix table */
|
|
||||||
+int numpfx; /* Number of prefixes in table */
|
|
||||||
+int numsfx; /* Number of suffixes in table */
|
|
||||||
+
|
|
||||||
+/* the prefix table */
|
|
||||||
+struct affixptr ptable[MAX_PREFIXES];
|
|
||||||
+
|
|
||||||
+/* the suffix table */
|
|
||||||
+struct affixptr stable[MAX_SUFFIXES];
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+/* data structure to store results of lookups */
|
|
||||||
+struct matches
|
|
||||||
+{
|
|
||||||
+ struct hentry * hashent; /* hash table entry */
|
|
||||||
+ struct affent * prefix; /* Prefix used, or NULL */
|
|
||||||
+ struct affent * suffix; /* Suffix used, or NULL */
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int numroots; /* number of root words found */
|
|
||||||
+struct matches roots[MAX_ROOTS]; /* list of root words found */
|
|
||||||
+
|
|
||||||
+/* hashing stuff */
|
|
||||||
+
|
|
||||||
+struct hentry
|
|
||||||
+{
|
|
||||||
+ char * word;
|
|
||||||
+ char * affstr;
|
|
||||||
+ struct hentry * next;
|
|
||||||
+ int keep;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+int tablesize;
|
|
||||||
+struct hentry * tableptr;
|
|
||||||
+
|
|
||||||
+/* unmunch stuff */
|
|
||||||
+
|
|
||||||
+int numwords; /* number of words found */
|
|
||||||
+struct dwords
|
|
||||||
+{
|
|
||||||
+ char * word;
|
|
||||||
+ int pallow;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct dwords wlist[MAX_WORDS]; /* list words found */
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+/* the routines */
|
|
||||||
+
|
|
||||||
+int parse_aff_file(FILE* afflst);
|
|
||||||
+
|
|
||||||
+void encodeit(struct affent * ptr, char * cs);
|
|
||||||
+
|
|
||||||
+int load_tables(FILE * wrdlst);
|
|
||||||
+
|
|
||||||
+int hash(const char *);
|
|
||||||
+
|
|
||||||
+int add_word(char *);
|
|
||||||
+
|
|
||||||
+struct hentry * lookup(const char *);
|
|
||||||
+
|
|
||||||
+void aff_chk (const char * word, int len);
|
|
||||||
+
|
|
||||||
+void pfx_chk (const char * word, int len, struct affent* ep, int num);
|
|
||||||
+
|
|
||||||
+void suf_chk (const char * word, int len, struct affent * ep, int num,
|
|
||||||
+ struct affent * pfxent, int cpflag);
|
|
||||||
+
|
|
||||||
+void add_affix_char(struct hentry * hent, char ac);
|
|
||||||
+
|
|
||||||
+int expand_rootword(const char *, int, const char*, int);
|
|
||||||
+
|
|
||||||
+void pfx_add (const char * word, int len, struct affent* ep, int num);
|
|
||||||
+
|
|
||||||
+void suf_add (const char * word, int len, struct affent * ep, int num);
|
|
||||||
+
|
|
||||||
+char * mystrsep(char ** stringp, const char delim);
|
|
||||||
+
|
|
||||||
+char * mystrdup(const char * s);
|
|
||||||
+
|
|
||||||
+void mychomp(char * s);
|
|
||||||
--- /dev/null 2007-03-16 11:16:34.774869017 +0000
|
|
||||||
+++ hunspell-1.1.5/src/tools/unmunch.h 2007-03-20 09:19:50.000000000 +0000
|
|
||||||
@@ -0,0 +1,76 @@
|
|
||||||
+/* unmunch header file */
|
|
||||||
+
|
|
||||||
+#define MAX_LN_LEN 200
|
|
||||||
+#define MAX_WD_LEN 200
|
|
||||||
+#define MAX_PREFIXES 256
|
|
||||||
+#define MAX_SUFFIXES 256
|
|
||||||
+#define MAX_WORDS 5000
|
|
||||||
+
|
|
||||||
+#define ROTATE_LEN 5
|
|
||||||
+
|
|
||||||
+#define ROTATE(v,q) \
|
|
||||||
+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
|
|
||||||
+
|
|
||||||
+#define SET_SIZE 256
|
|
||||||
+
|
|
||||||
+#define XPRODUCT (1 << 0)
|
|
||||||
+
|
|
||||||
+/* the affix table entry */
|
|
||||||
+
|
|
||||||
+struct affent
|
|
||||||
+{
|
|
||||||
+ char * appnd;
|
|
||||||
+ char * strip;
|
|
||||||
+ short appndl;
|
|
||||||
+ short stripl;
|
|
||||||
+ char achar;
|
|
||||||
+ char xpflg;
|
|
||||||
+ short numconds;
|
|
||||||
+ char conds[SET_SIZE];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+struct affixptr
|
|
||||||
+{
|
|
||||||
+ struct affent * aep;
|
|
||||||
+ int num;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+/* the prefix and suffix table */
|
|
||||||
+int numpfx; /* Number of prefixes in table */
|
|
||||||
+int numsfx; /* Number of suffixes in table */
|
|
||||||
+
|
|
||||||
+/* the prefix table */
|
|
||||||
+struct affixptr ptable[MAX_PREFIXES];
|
|
||||||
+
|
|
||||||
+/* the suffix table */
|
|
||||||
+struct affixptr stable[MAX_SUFFIXES];
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+int numwords; /* number of words found */
|
|
||||||
+struct dwords
|
|
||||||
+{
|
|
||||||
+ char * word;
|
|
||||||
+ int pallow;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct dwords wlist[MAX_WORDS]; /* list words found */
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+/* the routines */
|
|
||||||
+
|
|
||||||
+int parse_aff_file(FILE* afflst);
|
|
||||||
+
|
|
||||||
+void encodeit(struct affent * ptr, char * cs);
|
|
||||||
+
|
|
||||||
+int expand_rootword(const char *, int, const char*, int);
|
|
||||||
+
|
|
||||||
+void pfx_add (const char * word, int len, struct affent* ep, int num);
|
|
||||||
+
|
|
||||||
+void suf_add (const char * word, int len, struct affent * ep, int num);
|
|
||||||
+
|
|
||||||
+char * mystrsep(char ** stringp, const char delim);
|
|
||||||
+
|
|
||||||
+char * mystrdup(const char * s);
|
|
||||||
+
|
|
||||||
+void mychomp(char * s);
|
|
@ -1,6 +1,101 @@
|
|||||||
|
diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx hunspell-1.1.5/src/hunspell/affixmgr.cxx
|
||||||
|
--- hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx 2007-03-12 11:38:23.000000000 +0000
|
||||||
|
+++ hunspell-1.1.5/src/hunspell/affixmgr.cxx 2007-05-22 12:35:27.000000000 +0100
|
||||||
|
@@ -19,6 +19,8 @@
|
||||||
|
|
||||||
|
#include "csutil.hxx"
|
||||||
|
|
||||||
|
+#include <iconv.h>
|
||||||
|
+
|
||||||
|
#ifndef MOZILLA_CLIENT
|
||||||
|
#ifndef W32
|
||||||
|
using namespace std;
|
||||||
|
@@ -230,6 +232,61 @@
|
||||||
|
checknum=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+char *changeenc(iconv_t cd, char *token)
|
||||||
|
+{
|
||||||
|
+ if (!cd || !token) return token;
|
||||||
|
+
|
||||||
|
+ iconv (cd, NULL, NULL, NULL, NULL);
|
||||||
|
+
|
||||||
|
+ char *inptr = token;
|
||||||
|
+ size_t insize = strlen(token);
|
||||||
|
+
|
||||||
|
+ size_t avail = (insize + 1) * 3;
|
||||||
|
+ char *wrptr = (char*)malloc(avail);
|
||||||
|
+ char *output = wrptr;
|
||||||
|
+
|
||||||
|
+ memset(wrptr, 0, avail);
|
||||||
|
+ size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail);
|
||||||
|
+ free(token);
|
||||||
|
+
|
||||||
|
+ if (nconv == (size_t) -1)
|
||||||
|
+ {
|
||||||
|
+ free(output);
|
||||||
|
+ output = NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return output;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void AffixMgr::fill_unicode_equivalents()
|
||||||
|
+{
|
||||||
|
+ if ((!ignorechars_utf16_len || !wordchars_utf16_len) && (ignorechars || wordchars))
|
||||||
|
+ {
|
||||||
|
+ iconv_t to_utf16 = iconv_open("UCS2", encoding);
|
||||||
|
+ if (to_utf16 == 0 || to_utf16 == (iconv_t) -1)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ unsigned short *p;
|
||||||
|
+
|
||||||
|
+ if (!ignorechars_utf16_len && ignorechars && strlen(ignorechars))
|
||||||
|
+ {
|
||||||
|
+ char *tmp = mystrdup(ignorechars);
|
||||||
|
+ ignorechars_utf16 = (unsigned short*)changeenc(to_utf16, tmp);
|
||||||
|
+ for (p = ignorechars_utf16; *p; ++p);
|
||||||
|
+ ignorechars_utf16_len = p - ignorechars_utf16;
|
||||||
|
+ flag_qsort(ignorechars_utf16, 0, ignorechars_utf16_len);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!wordchars_utf16_len && wordchars && strlen(wordchars))
|
||||||
|
+ {
|
||||||
|
+ char *tmp = mystrdup(wordchars);
|
||||||
|
+ wordchars_utf16 = (unsigned short*)changeenc(to_utf16, tmp);
|
||||||
|
+ for (p = wordchars_utf16; *p; ++p);
|
||||||
|
+ wordchars_utf16_len = p - wordchars_utf16;
|
||||||
|
+ flag_qsort(wordchars_utf16, 0, wordchars_utf16_len);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
|
||||||
|
// read in aff file and build up prefix and suffix entry objects
|
||||||
|
int AffixMgr::parse_file(const char * affpath)
|
||||||
|
@@ -659,6 +716,9 @@
|
||||||
|
breaktable[0] = mystrdup("-");
|
||||||
|
numbreak = 1;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ fill_unicode_equivalents();
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx hunspell-1.1.5/src/hunspell/affixmgr.hxx
|
||||||
|
--- hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx 2007-03-12 11:42:52.000000000 +0000
|
||||||
|
+++ hunspell-1.1.5/src/hunspell/affixmgr.hxx 2007-05-22 12:00:14.000000000 +0100
|
||||||
|
@@ -203,6 +203,7 @@
|
||||||
|
int process_pfx_tree_to_list();
|
||||||
|
int process_sfx_tree_to_list();
|
||||||
|
int redundant_condition(char, char * strip, int stripl, const char * cond, char *);
|
||||||
|
+ void fill_unicode_equivalents();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell/csutil.cxx
|
diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell/csutil.cxx
|
||||||
--- hunspell-1.1.5.orig/src/hunspell/csutil.cxx 2007-03-12 12:01:56.000000000 +0000
|
--- hunspell-1.1.5.orig/src/hunspell/csutil.cxx 2007-03-12 12:01:56.000000000 +0000
|
||||||
+++ hunspell-1.1.5/src/hunspell/csutil.cxx 2007-05-21 13:31:46.000000000 +0100
|
+++ hunspell-1.1.5/src/hunspell/csutil.cxx 2007-05-22 12:21:30.000000000 +0100
|
||||||
@@ -5090,6 +5090,10 @@
|
@@ -5090,6 +5090,10 @@
|
||||||
#ifndef OPENOFFICEORG
|
#ifndef OPENOFFICEORG
|
||||||
#ifndef MOZILLA_CLIENT
|
#ifndef MOZILLA_CLIENT
|
||||||
@ -14,7 +109,7 @@ diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell
|
|||||||
int j;
|
int j;
|
||||||
diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parsers/textparser.cxx
|
diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parsers/textparser.cxx
|
||||||
--- hunspell-1.1.5.orig/src/parsers/textparser.cxx 2007-03-05 12:59:53.000000000 +0000
|
--- hunspell-1.1.5.orig/src/parsers/textparser.cxx 2007-03-05 12:59:53.000000000 +0000
|
||||||
+++ hunspell-1.1.5/src/parsers/textparser.cxx 2007-05-21 13:31:46.000000000 +0100
|
+++ hunspell-1.1.5/src/parsers/textparser.cxx 2007-05-22 12:02:24.000000000 +0100
|
||||||
@@ -5,6 +5,7 @@
|
@@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
#include "../hunspell/csutil.hxx"
|
||||||
@ -23,7 +118,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse
|
|||||||
|
|
||||||
#ifndef W32
|
#ifndef W32
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@@ -46,22 +47,76 @@
|
@@ -46,22 +47,52 @@
|
||||||
|
|
||||||
#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *))
|
#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *))
|
||||||
|
|
||||||
@ -33,31 +128,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse
|
|||||||
}
|
}
|
||||||
|
|
||||||
-TextParser::TextParser(const char * wordchars)
|
-TextParser::TextParser(const char * wordchars)
|
||||||
+static char *changeenc(iconv_t cd, char *token)
|
+extern char *changeenc(iconv_t cd, char *token);
|
||||||
+{
|
|
||||||
+ if (!cd || !token) return token;
|
|
||||||
+
|
|
||||||
+ iconv (cd, NULL, NULL, NULL, NULL);
|
|
||||||
+
|
|
||||||
+ char *inptr = token;
|
|
||||||
+ size_t insize = strlen(token);
|
|
||||||
+
|
|
||||||
+ size_t avail = (insize + 1) * 3;
|
|
||||||
+ char *wrptr = (char*)malloc(avail);
|
|
||||||
+ char *output = wrptr;
|
|
||||||
+
|
|
||||||
+ memset(wrptr, 0, avail);
|
|
||||||
+ size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail);
|
|
||||||
+ free(token);
|
|
||||||
+
|
|
||||||
+ if (nconv == (size_t) -1)
|
|
||||||
+ {
|
|
||||||
+ free(output);
|
|
||||||
+ output = NULL;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return output;
|
|
||||||
+}
|
|
||||||
+
|
+
|
||||||
+char *TextParser::todictenc(char *token)
|
+char *TextParser::todictenc(char *token)
|
||||||
+{
|
+{
|
||||||
@ -105,7 +176,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse
|
|||||||
int TextParser::is_wordchar(char * w)
|
int TextParser::is_wordchar(char * w)
|
||||||
diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parsers/textparser.hxx
|
diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parsers/textparser.hxx
|
||||||
--- hunspell-1.1.5.orig/src/parsers/textparser.hxx 2007-01-19 01:01:07.000000000 +0000
|
--- hunspell-1.1.5.orig/src/parsers/textparser.hxx 2007-01-19 01:01:07.000000000 +0000
|
||||||
+++ hunspell-1.1.5/src/parsers/textparser.hxx 2007-05-21 13:31:46.000000000 +0100
|
+++ hunspell-1.1.5/src/parsers/textparser.hxx 2007-05-22 11:29:52.000000000 +0100
|
||||||
@@ -20,12 +20,15 @@
|
@@ -20,12 +20,15 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -152,7 +223,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parse
|
|||||||
#endif
|
#endif
|
||||||
diff -ru hunspell-1.1.5.orig/src/tools/hunspell.cxx hunspell-1.1.5/src/tools/hunspell.cxx
|
diff -ru hunspell-1.1.5.orig/src/tools/hunspell.cxx hunspell-1.1.5/src/tools/hunspell.cxx
|
||||||
--- hunspell-1.1.5.orig/src/tools/hunspell.cxx 2007-05-21 13:31:55.000000000 +0100
|
--- hunspell-1.1.5.orig/src/tools/hunspell.cxx 2007-05-21 13:31:55.000000000 +0100
|
||||||
+++ hunspell-1.1.5/src/tools/hunspell.cxx 2007-05-21 13:31:46.000000000 +0100
|
+++ hunspell-1.1.5/src/tools/hunspell.cxx 2007-05-22 11:15:33.000000000 +0100
|
||||||
@@ -89,6 +89,8 @@
|
@@ -89,6 +89,8 @@
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
Name: hunspell
|
Name: hunspell
|
||||||
Summary: Hunspell is a spell checker and morphological analyzer library
|
Summary: Hunspell is a spell checker and morphological analyzer library
|
||||||
Version: 1.1.5.3
|
Version: 1.1.5.3
|
||||||
Release: 2%{?dist}
|
Release: 3%{?dist}
|
||||||
Source: http://downloads.sourceforge.net/%{name}/hunspell-1.1.5-3.tar.gz
|
Source: http://downloads.sourceforge.net/%{name}/hunspell-1.1.5-3.tar.gz
|
||||||
Group: System Environment/Libraries
|
Group: System Environment/Libraries
|
||||||
URL: http://hunspell.sourceforge.net/
|
URL: http://hunspell.sourceforge.net/
|
||||||
@ -82,6 +82,10 @@ rm -rf $RPM_BUILD_ROOT
|
|||||||
%{_libdir}/pkgconfig/hunspell.pc
|
%{_libdir}/pkgconfig/hunspell.pc
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue May 22 2007 Caolan McNamara <caolanm@redhat.com> - 1.1.5.3-3
|
||||||
|
- Resolves: rhbz#240696 extend encoding patch to promote and add
|
||||||
|
dictionary 8bit WORDCHARS to the ucs-2 word char list
|
||||||
|
|
||||||
* Mon May 21 2007 Caolan McNamara <caolanm@redhat.com> - 1.1.5.3-2
|
* Mon May 21 2007 Caolan McNamara <caolanm@redhat.com> - 1.1.5.3-2
|
||||||
- Resolves: rhbz#240696 add hunspell-1.1.5.encoding.patch
|
- Resolves: rhbz#240696 add hunspell-1.1.5.encoding.patch
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user