From 0a1b5e574277412ef8e938c776d557c84b7c1d22 Mon Sep 17 00:00:00 2001 From: Caolan McNamara Date: Tue, 22 May 2007 11:42:41 +0000 Subject: [PATCH] add 8bit dictionary word chars to the unicode list --- hunspell-1.1.5-missingheaders.patch | 203 ---------------------------- hunspell-1.1.5.encoding.patch | 131 ++++++++++++++---- hunspell.spec | 6 +- 3 files changed, 106 insertions(+), 234 deletions(-) delete mode 100644 hunspell-1.1.5-missingheaders.patch diff --git a/hunspell-1.1.5-missingheaders.patch b/hunspell-1.1.5-missingheaders.patch deleted file mode 100644 index 85685ba..0000000 --- a/hunspell-1.1.5-missingheaders.patch +++ /dev/null @@ -1,203 +0,0 @@ ---- /dev/null 2007-03-16 11:16:34.774869017 +0000 -+++ hunspell-1.1.5/src/tools/munch.h 2007-03-20 09:19:49.000000000 +0000 -@@ -0,0 +1,121 @@ -+/* munch header file */ -+ -+#define MAX_LN_LEN 200 -+#define MAX_WD_LEN 200 -+#define MAX_PREFIXES 256 -+#define MAX_SUFFIXES 256 -+#define MAX_ROOTS 20 -+#define MAX_WORDS 5000 -+ -+#define ROTATE_LEN 5 -+ -+#define ROTATE(v,q) \ -+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1)); -+ -+#define SET_SIZE 256 -+ -+#define XPRODUCT (1 << 0) -+ -+/* the affix table entry */ -+ -+struct affent -+{ -+ char * appnd; -+ char * strip; -+ short appndl; -+ short stripl; -+ char achar; -+ char xpflg; -+ short numconds; -+ char conds[SET_SIZE]; -+}; -+ -+ -+struct affixptr -+{ -+ struct affent * aep; -+ int num; -+}; -+ -+/* the prefix and suffix table */ -+int numpfx; /* Number of prefixes in table */ -+int numsfx; /* Number of suffixes in table */ -+ -+/* the prefix table */ -+struct affixptr ptable[MAX_PREFIXES]; -+ -+/* the suffix table */ -+struct affixptr stable[MAX_SUFFIXES]; -+ -+ -+/* data structure to store results of lookups */ -+struct matches -+{ -+ struct hentry * hashent; /* hash table entry */ -+ struct affent * prefix; /* Prefix used, or NULL */ -+ struct affent * suffix; /* Suffix used, or NULL */ -+}; -+ -+int numroots; /* number of root words found */ -+struct matches roots[MAX_ROOTS]; /* list of root words found */ -+ -+/* hashing stuff */ -+ -+struct hentry -+{ -+ char * word; -+ char * affstr; -+ struct hentry * next; -+ int keep; -+}; -+ -+ -+int tablesize; -+struct hentry * tableptr; -+ -+/* unmunch stuff */ -+ -+int numwords; /* number of words found */ -+struct dwords -+{ -+ char * word; -+ int pallow; -+}; -+ -+struct dwords wlist[MAX_WORDS]; /* list words found */ -+ -+ -+/* the routines */ -+ -+int parse_aff_file(FILE* afflst); -+ -+void encodeit(struct affent * ptr, char * cs); -+ -+int load_tables(FILE * wrdlst); -+ -+int hash(const char *); -+ -+int add_word(char *); -+ -+struct hentry * lookup(const char *); -+ -+void aff_chk (const char * word, int len); -+ -+void pfx_chk (const char * word, int len, struct affent* ep, int num); -+ -+void suf_chk (const char * word, int len, struct affent * ep, int num, -+ struct affent * pfxent, int cpflag); -+ -+void add_affix_char(struct hentry * hent, char ac); -+ -+int expand_rootword(const char *, int, const char*, int); -+ -+void pfx_add (const char * word, int len, struct affent* ep, int num); -+ -+void suf_add (const char * word, int len, struct affent * ep, int num); -+ -+char * mystrsep(char ** stringp, const char delim); -+ -+char * mystrdup(const char * s); -+ -+void mychomp(char * s); ---- /dev/null 2007-03-16 11:16:34.774869017 +0000 -+++ hunspell-1.1.5/src/tools/unmunch.h 2007-03-20 09:19:50.000000000 +0000 -@@ -0,0 +1,76 @@ -+/* unmunch header file */ -+ -+#define MAX_LN_LEN 200 -+#define MAX_WD_LEN 200 -+#define MAX_PREFIXES 256 -+#define MAX_SUFFIXES 256 -+#define MAX_WORDS 5000 -+ -+#define ROTATE_LEN 5 -+ -+#define ROTATE(v,q) \ -+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1)); -+ -+#define SET_SIZE 256 -+ -+#define XPRODUCT (1 << 0) -+ -+/* the affix table entry */ -+ -+struct affent -+{ -+ char * appnd; -+ char * strip; -+ short appndl; -+ short stripl; -+ char achar; -+ char xpflg; -+ short numconds; -+ char conds[SET_SIZE]; -+}; -+ -+ -+struct affixptr -+{ -+ struct affent * aep; -+ int num; -+}; -+ -+/* the prefix and suffix table */ -+int numpfx; /* Number of prefixes in table */ -+int numsfx; /* Number of suffixes in table */ -+ -+/* the prefix table */ -+struct affixptr ptable[MAX_PREFIXES]; -+ -+/* the suffix table */ -+struct affixptr stable[MAX_SUFFIXES]; -+ -+ -+int numwords; /* number of words found */ -+struct dwords -+{ -+ char * word; -+ int pallow; -+}; -+ -+struct dwords wlist[MAX_WORDS]; /* list words found */ -+ -+ -+/* the routines */ -+ -+int parse_aff_file(FILE* afflst); -+ -+void encodeit(struct affent * ptr, char * cs); -+ -+int expand_rootword(const char *, int, const char*, int); -+ -+void pfx_add (const char * word, int len, struct affent* ep, int num); -+ -+void suf_add (const char * word, int len, struct affent * ep, int num); -+ -+char * mystrsep(char ** stringp, const char delim); -+ -+char * mystrdup(const char * s); -+ -+void mychomp(char * s); diff --git a/hunspell-1.1.5.encoding.patch b/hunspell-1.1.5.encoding.patch index 44dc772..c8d7166 100644 --- a/hunspell-1.1.5.encoding.patch +++ b/hunspell-1.1.5.encoding.patch @@ -1,6 +1,101 @@ +diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx hunspell-1.1.5/src/hunspell/affixmgr.cxx +--- hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx 2007-03-12 11:38:23.000000000 +0000 ++++ hunspell-1.1.5/src/hunspell/affixmgr.cxx 2007-05-22 12:35:27.000000000 +0100 +@@ -19,6 +19,8 @@ + + #include "csutil.hxx" + ++#include ++ + #ifndef MOZILLA_CLIENT + #ifndef W32 + using namespace std; +@@ -230,6 +232,61 @@ + checknum=0; + } + ++char *changeenc(iconv_t cd, char *token) ++{ ++ if (!cd || !token) return token; ++ ++ iconv (cd, NULL, NULL, NULL, NULL); ++ ++ char *inptr = token; ++ size_t insize = strlen(token); ++ ++ size_t avail = (insize + 1) * 3; ++ char *wrptr = (char*)malloc(avail); ++ char *output = wrptr; ++ ++ memset(wrptr, 0, avail); ++ size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail); ++ free(token); ++ ++ if (nconv == (size_t) -1) ++ { ++ free(output); ++ output = NULL; ++ } ++ ++ return output; ++} ++ ++void AffixMgr::fill_unicode_equivalents() ++{ ++ if ((!ignorechars_utf16_len || !wordchars_utf16_len) && (ignorechars || wordchars)) ++ { ++ iconv_t to_utf16 = iconv_open("UCS2", encoding); ++ if (to_utf16 == 0 || to_utf16 == (iconv_t) -1) ++ return; ++ ++ unsigned short *p; ++ ++ if (!ignorechars_utf16_len && ignorechars && strlen(ignorechars)) ++ { ++ char *tmp = mystrdup(ignorechars); ++ ignorechars_utf16 = (unsigned short*)changeenc(to_utf16, tmp); ++ for (p = ignorechars_utf16; *p; ++p); ++ ignorechars_utf16_len = p - ignorechars_utf16; ++ flag_qsort(ignorechars_utf16, 0, ignorechars_utf16_len); ++ } ++ ++ if (!wordchars_utf16_len && wordchars && strlen(wordchars)) ++ { ++ char *tmp = mystrdup(wordchars); ++ wordchars_utf16 = (unsigned short*)changeenc(to_utf16, tmp); ++ for (p = wordchars_utf16; *p; ++p); ++ wordchars_utf16_len = p - wordchars_utf16; ++ flag_qsort(wordchars_utf16, 0, wordchars_utf16_len); ++ } ++ } ++} + + // read in aff file and build up prefix and suffix entry objects + int AffixMgr::parse_file(const char * affpath) +@@ -659,6 +716,9 @@ + breaktable[0] = mystrdup("-"); + numbreak = 1; + } ++ ++ fill_unicode_equivalents(); ++ + return 0; + } + +diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx hunspell-1.1.5/src/hunspell/affixmgr.hxx +--- hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx 2007-03-12 11:42:52.000000000 +0000 ++++ hunspell-1.1.5/src/hunspell/affixmgr.hxx 2007-05-22 12:00:14.000000000 +0100 +@@ -203,6 +203,7 @@ + int process_pfx_tree_to_list(); + int process_sfx_tree_to_list(); + int redundant_condition(char, char * strip, int stripl, const char * cond, char *); ++ void fill_unicode_equivalents(); + }; + + #endif diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell/csutil.cxx --- hunspell-1.1.5.orig/src/hunspell/csutil.cxx 2007-03-12 12:01:56.000000000 +0000 -+++ hunspell-1.1.5/src/hunspell/csutil.cxx 2007-05-21 13:31:46.000000000 +0100 ++++ hunspell-1.1.5/src/hunspell/csutil.cxx 2007-05-22 12:21:30.000000000 +0100 @@ -5090,6 +5090,10 @@ #ifndef OPENOFFICEORG #ifndef MOZILLA_CLIENT @@ -14,7 +109,7 @@ diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell int j; diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parsers/textparser.cxx --- hunspell-1.1.5.orig/src/parsers/textparser.cxx 2007-03-05 12:59:53.000000000 +0000 -+++ hunspell-1.1.5/src/parsers/textparser.cxx 2007-05-21 13:31:46.000000000 +0100 ++++ hunspell-1.1.5/src/parsers/textparser.cxx 2007-05-22 12:02:24.000000000 +0100 @@ -5,6 +5,7 @@ #include "../hunspell/csutil.hxx" @@ -23,7 +118,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse #ifndef W32 using namespace std; -@@ -46,22 +47,76 @@ +@@ -46,22 +47,52 @@ #define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *)) @@ -33,31 +128,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse } -TextParser::TextParser(const char * wordchars) -+static char *changeenc(iconv_t cd, char *token) -+{ -+ if (!cd || !token) return token; -+ -+ iconv (cd, NULL, NULL, NULL, NULL); -+ -+ char *inptr = token; -+ size_t insize = strlen(token); -+ -+ size_t avail = (insize + 1) * 3; -+ char *wrptr = (char*)malloc(avail); -+ char *output = wrptr; -+ -+ memset(wrptr, 0, avail); -+ size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail); -+ free(token); -+ -+ if (nconv == (size_t) -1) -+ { -+ free(output); -+ output = NULL; -+ } -+ -+ return output; -+} ++extern char *changeenc(iconv_t cd, char *token); + +char *TextParser::todictenc(char *token) +{ @@ -105,7 +176,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse int TextParser::is_wordchar(char * w) diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parsers/textparser.hxx --- hunspell-1.1.5.orig/src/parsers/textparser.hxx 2007-01-19 01:01:07.000000000 +0000 -+++ hunspell-1.1.5/src/parsers/textparser.hxx 2007-05-21 13:31:46.000000000 +0100 ++++ hunspell-1.1.5/src/parsers/textparser.hxx 2007-05-22 11:29:52.000000000 +0100 @@ -20,12 +20,15 @@ * */ @@ -152,7 +223,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parse #endif diff -ru hunspell-1.1.5.orig/src/tools/hunspell.cxx hunspell-1.1.5/src/tools/hunspell.cxx --- hunspell-1.1.5.orig/src/tools/hunspell.cxx 2007-05-21 13:31:55.000000000 +0100 -+++ hunspell-1.1.5/src/tools/hunspell.cxx 2007-05-21 13:31:46.000000000 +0100 ++++ hunspell-1.1.5/src/tools/hunspell.cxx 2007-05-22 11:15:33.000000000 +0100 @@ -89,6 +89,8 @@ #endif #endif diff --git a/hunspell.spec b/hunspell.spec index f9675da..ede9db8 100644 --- a/hunspell.spec +++ b/hunspell.spec @@ -1,7 +1,7 @@ Name: hunspell Summary: Hunspell is a spell checker and morphological analyzer library Version: 1.1.5.3 -Release: 2%{?dist} +Release: 3%{?dist} Source: http://downloads.sourceforge.net/%{name}/hunspell-1.1.5-3.tar.gz Group: System Environment/Libraries URL: http://hunspell.sourceforge.net/ @@ -82,6 +82,10 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/pkgconfig/hunspell.pc %changelog +* Tue May 22 2007 Caolan McNamara - 1.1.5.3-3 +- Resolves: rhbz#240696 extend encoding patch to promote and add + dictionary 8bit WORDCHARS to the ucs-2 word char list + * Mon May 21 2007 Caolan McNamara - 1.1.5.3-2 - Resolves: rhbz#240696 add hunspell-1.1.5.encoding.patch