diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx hunspell-1.1.5/src/hunspell/affixmgr.cxx --- hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx 2007-06-06 13:04:45.000000000 +0100 +++ hunspell-1.1.5/src/hunspell/affixmgr.cxx 2007-06-06 13:05:35.000000000 +0100 @@ -19,6 +19,8 @@ #include "csutil.hxx" +#include + #ifndef MOZILLA_CLIENT #ifndef W32 using namespace std; @@ -230,6 +232,63 @@ checknum=0; } +char *changeenc(iconv_t cd, char *token) +{ + if (!cd || !token) return token; + + iconv (cd, NULL, NULL, NULL, NULL); + + char *inptr = token; + size_t insize = strlen(token); + + size_t avail = (insize + 1) * 3; + char *wrptr = (char*)malloc(avail); + char *output = wrptr; + + memset(wrptr, 0, avail); + size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail); + free(token); + + if (nconv == (size_t) -1) + { + free(output); + output = NULL; + } + + return output; +} + +void AffixMgr::fill_unicode_equivalents() +{ + if ((!ignorechars_utf16_len || !wordchars_utf16_len) && (ignorechars || wordchars)) + { + iconv_t to_utf16 = iconv_open("UCS2", encoding); + if (to_utf16 == 0 || to_utf16 == (iconv_t) -1) + return; + + unsigned short *p; + + if (!ignorechars_utf16_len && ignorechars && strlen(ignorechars)) + { + char *tmp = mystrdup(ignorechars); + ignorechars_utf16 = (unsigned short*)changeenc(to_utf16, tmp); + for (p = ignorechars_utf16; p && *p; ++p); + ignorechars_utf16_len = p - ignorechars_utf16; + flag_qsort(ignorechars_utf16, 0, ignorechars_utf16_len); + } + + if (!wordchars_utf16_len && wordchars && strlen(wordchars)) + { + char *tmp = mystrdup(wordchars); + wordchars_utf16 = (unsigned short*)changeenc(to_utf16, tmp); + for (p = wordchars_utf16; p && *p; ++p); + wordchars_utf16_len = p - wordchars_utf16; + flag_qsort(wordchars_utf16, 0, wordchars_utf16_len); + } + + iconv_close(to_utf16); + } +} // read in aff file and build up prefix and suffix entry objects int AffixMgr::parse_file(const char * affpath) @@ -633,6 +692,7 @@ char * enc = get_encoding(); csconv = get_current_cs(enc); + bool bUTF8 = strcasecmp(enc, "UTF-8") != 0; free(enc); enc = NULL; @@ -642,7 +702,7 @@ free(wordchars); } else *expw = '\0'; - for (int i = 0; i <= 255; i++) { + for (int i = 0; i <= (bUTF8 ? 127 : 255); i++) { if ( (csconv[i].cupper != csconv[i].clower) && (! strchr(expw, (char) i))) { *(expw + strlen(expw) + 1) = '\0'; @@ -659,6 +719,9 @@ breaktable[0] = mystrdup("-"); numbreak = 1; } + + fill_unicode_equivalents(); + return 0; } Only in hunspell-1.1.5/src/hunspell: affixmgr.cxx.orig Only in hunspell-1.1.5/src/hunspell: affixmgr.cxx.rej diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx hunspell-1.1.5/src/hunspell/affixmgr.hxx --- hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx 2007-06-06 13:04:45.000000000 +0100 +++ hunspell-1.1.5/src/hunspell/affixmgr.hxx 2007-06-06 13:05:02.000000000 +0100 @@ -203,6 +203,7 @@ int process_pfx_tree_to_list(); int process_sfx_tree_to_list(); int redundant_condition(char, char * strip, int stripl, const char * cond, char *); + void fill_unicode_equivalents(); }; #endif diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell/csutil.cxx --- hunspell-1.1.5.orig/src/hunspell/csutil.cxx 2007-06-06 13:04:45.000000000 +0100 +++ hunspell-1.1.5/src/hunspell/csutil.cxx 2007-06-06 13:05:02.000000000 +0100 @@ -5090,6 +5090,10 @@ #ifndef OPENOFFICEORG #ifndef MOZILLA_CLIENT int initialize_utf_tbl() { + + if (utf_tbl) + return 0; + utf_tbl = (unicode_info2 *) malloc(CONTSIZE * sizeof(unicode_info2)); if (utf_tbl) { int j; diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parsers/textparser.cxx --- hunspell-1.1.5.orig/src/parsers/textparser.cxx 2007-06-06 13:04:45.000000000 +0100 +++ hunspell-1.1.5/src/parsers/textparser.cxx 2007-06-06 13:05:02.000000000 +0100 @@ -5,6 +5,7 @@ #include "../hunspell/csutil.hxx" #include "textparser.hxx" +#include #ifndef W32 using namespace std; @@ -46,22 +47,52 @@ #define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *)) -TextParser::TextParser() { +TextParser::TextParser() : todict(NULL), fromdict(NULL) { init((char *) NULL); } -TextParser::TextParser(const char * wordchars) +extern char *changeenc(iconv_t cd, char *token); + +char *TextParser::todictenc(char *token) +{ + return changeenc(todict, token); +} + +char *TextParser::fromdictenc(char *token) +{ + return changeenc(fromdict, token); +} + +void TextParser::openiconv(const char *dictenc) +{ + todict = fromdict = NULL; + const char *srcenc = nl_langinfo(CODESET); + + if (dictenc && srcenc && (strcmp(dictenc, srcenc) != 0)) + { + if ((todict = iconv_open(dictenc, srcenc)) == (iconv_t) -1) + todict = NULL; + if ((fromdict = iconv_open(srcenc, dictenc)) == (iconv_t) -1) + fromdict = NULL; + } +} + +TextParser::TextParser(const char * wordchars, const char *destenc) { init(wordchars); + openiconv(destenc); } -TextParser::TextParser(unsigned short * wordchars, int len) +TextParser::TextParser(unsigned short * wordchars, int len, const char *destenc) { init(wordchars, len); + openiconv(destenc); } TextParser::~TextParser() { + if (todict) iconv_close(todict); + if (fromdict) iconv_close(fromdict); } int TextParser::is_wordchar(char * w) diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parsers/textparser.hxx --- hunspell-1.1.5.orig/src/parsers/textparser.hxx 2007-06-06 13:04:45.000000000 +0100 +++ hunspell-1.1.5/src/parsers/textparser.hxx 2007-06-06 13:05:02.000000000 +0100 @@ -20,12 +20,15 @@ * */ +#include + class TextParser { protected: void init(const char *); void init(unsigned short * wordchars, int len); + void openiconv(const char *dictenc); int wordcharacters[256]; // for detection of the word boundaries char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines int actual; // actual line @@ -36,12 +39,14 @@ int next_char(char * line, int * pos); unsigned short * wordchars_utf16; int wclen; + iconv_t todict; + iconv_t fromdict; public: TextParser(); - TextParser(unsigned short * wordchars, int len); - TextParser(const char * wc); + TextParser(unsigned short * wordchars, int len, const char *destenc); + TextParser(const char * wc, const char *destenc); virtual ~TextParser(); void put_line(char * line); @@ -53,7 +58,9 @@ int is_wordchar(char * w); char * get_latin1(char * s); char * next_char(); - + + char * todictenc(char * in); + char * fromdictenc(char * in); }; #endif diff -ru hunspell-1.1.5.orig/src/tools/hunspell.cxx hunspell-1.1.5/src/tools/hunspell.cxx --- hunspell-1.1.5.orig/src/tools/hunspell.cxx 2007-06-06 13:04:45.000000000 +0100 +++ hunspell-1.1.5/src/tools/hunspell.cxx 2007-06-06 13:05:02.000000000 +0100 @@ -89,6 +89,8 @@ #endif #endif +#include + #define TEMPNAME "hunSPELL.bak" extern char * mystrdup(const char * s); @@ -130,9 +132,11 @@ TextParser * newParser(char * wordchars, int format, char * extension, Hunspell * pMS) { TextParser * p = NULL; - int utf8 = (strcmp(pMS->get_dic_encoding(), "UTF-8") == 0); + int utf8 = (strcmp(nl_langinfo(CODESET), "UTF-8") == 0); if (utf8) { + if (initialize_utf_tbl()) return NULL; + switch (format) { case FMT_LATEX: p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len); break; case FMT_HTML: p = new HTMLParser(wordchars_utf16, wordchars_utf16_len); break; @@ -173,9 +177,9 @@ } if (!p) { if (utf8) { - p = new TextParser(wordchars_utf16, wordchars_utf16_len); + p = new TextParser(wordchars_utf16, wordchars_utf16_len, pMS->get_dic_encoding()); } else { - p = new TextParser(wordchars); + p = new TextParser(wordchars, pMS->get_dic_encoding()); } } return p; @@ -260,7 +264,7 @@ w = w->next; free(r); } - fclose(dic); + return 0 == fclose(dic); } char * basename(char * s, char c) { @@ -373,13 +377,15 @@ if (pos >= 0) { parser->put_line(buf + pos); - while ((token = parser->next_token())) { + while ((token = parser->todictenc(parser->next_token()))) { switch (filter_mode) { case BADWORD: { - if (! pMS->spell(token)) { + int nRes = pMS->spell(token); + token = parser->fromdictenc(token); + if (!nRes) { bad = 1; - if (! printgood) fprintf(stdout,"%s\n", token); + if (!printgood) fprintf(stdout,"%s\n", token); } else { if (printgood) fprintf(stdout,"%s\n", token); } @@ -414,6 +420,8 @@ char ** wlst = NULL; bad = 1; int ns = pMS->suggest_auto(&wlst, token); + for (int j = 0; j < ns; j++) + wlst[j] = parser->fromdictenc(wlst[j]); if (ns > 0) { parser->change_token(wlst[0]); if (filter_mode != AUTO2) { @@ -451,6 +459,8 @@ } else { char ** wlst = NULL; int ns = pMS->suggest(&wlst, token); + for (int j = 0; j < ns; j++) + wlst[j] = parser->fromdictenc(wlst[j]); if (ns == 0) { fprintf(stdout,"# %s %d", token, parser->get_tokenpos() + pos); } else { @@ -554,6 +564,9 @@ getmaxyx(stdscr,y,x); clear(); + token = mystrdup(token); + token = parser->fromdictenc(token); + if (forbidden) printw(gettext("FORBIDDEN!")); printw(gettext("\t%s\t\tFile: %s\n\n"), token,filename); @@ -899,11 +912,13 @@ char * token; int dialogexit = 0; int info; - while ((token=parser->next_token())) { + while ((token=parser->todictenc(parser->next_token()))) { if (! pMS->spell(token, &info, NULL)) { dialogscreen(parser, token, filename, (info & SPELL_FORBIDDEN), NULL, 0); // preview char ** wlst = NULL; int ns = pMS->suggest(&wlst,token); + for (int j = 0; j < ns; j++) + wlst[j] = parser->fromdictenc(wlst[j]); if (ns==0) { dialogexit = dialog(parser, pMS, token, filename, wlst, ns, (info & SPELL_FORBIDDEN)); } else {