353 lines
10 KiB
Diff
353 lines
10 KiB
Diff
diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx hunspell-1.1.5/src/hunspell/affixmgr.cxx
|
|
--- hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx 2007-06-06 13:04:45.000000000 +0100
|
|
+++ hunspell-1.1.5/src/hunspell/affixmgr.cxx 2007-06-06 13:05:35.000000000 +0100
|
|
@@ -19,6 +19,8 @@
|
|
|
|
#include "csutil.hxx"
|
|
|
|
+#include <iconv.h>
|
|
+
|
|
#ifndef MOZILLA_CLIENT
|
|
#ifndef W32
|
|
using namespace std;
|
|
@@ -230,6 +232,63 @@
|
|
checknum=0;
|
|
}
|
|
|
|
+char *changeenc(iconv_t cd, char *token)
|
|
+{
|
|
+ if (!cd || !token) return token;
|
|
+
|
|
+ iconv (cd, NULL, NULL, NULL, NULL);
|
|
+
|
|
+ char *inptr = token;
|
|
+ size_t insize = strlen(token);
|
|
+
|
|
+ size_t avail = (insize + 1) * 3;
|
|
+ char *wrptr = (char*)malloc(avail);
|
|
+ char *output = wrptr;
|
|
+
|
|
+ memset(wrptr, 0, avail);
|
|
+ size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail);
|
|
+ free(token);
|
|
+
|
|
+ if (nconv == (size_t) -1)
|
|
+ {
|
|
+ free(output);
|
|
+ output = NULL;
|
|
+ }
|
|
+
|
|
+ return output;
|
|
+}
|
|
+
|
|
+void AffixMgr::fill_unicode_equivalents()
|
|
+{
|
|
+ if ((!ignorechars_utf16_len || !wordchars_utf16_len) && (ignorechars || wordchars))
|
|
+ {
|
|
+ iconv_t to_utf16 = iconv_open("UCS2", encoding);
|
|
+ if (to_utf16 == 0 || to_utf16 == (iconv_t) -1)
|
|
+ return;
|
|
+
|
|
+ unsigned short *p;
|
|
+
|
|
+ if (!ignorechars_utf16_len && ignorechars && strlen(ignorechars))
|
|
+ {
|
|
+ char *tmp = mystrdup(ignorechars);
|
|
+ ignorechars_utf16 = (unsigned short*)changeenc(to_utf16, tmp);
|
|
+ for (p = ignorechars_utf16; p && *p; ++p);
|
|
+ ignorechars_utf16_len = p - ignorechars_utf16;
|
|
+ flag_qsort(ignorechars_utf16, 0, ignorechars_utf16_len);
|
|
+ }
|
|
+
|
|
+ if (!wordchars_utf16_len && wordchars && strlen(wordchars))
|
|
+ {
|
|
+ char *tmp = mystrdup(wordchars);
|
|
+ wordchars_utf16 = (unsigned short*)changeenc(to_utf16, tmp);
|
|
+ for (p = wordchars_utf16; p && *p; ++p);
|
|
+ wordchars_utf16_len = p - wordchars_utf16;
|
|
+ flag_qsort(wordchars_utf16, 0, wordchars_utf16_len);
|
|
+ }
|
|
+
|
|
+ iconv_close(to_utf16);
|
|
+ }
|
|
+}
|
|
|
|
// read in aff file and build up prefix and suffix entry objects
|
|
int AffixMgr::parse_file(const char * affpath)
|
|
@@ -633,6 +692,7 @@
|
|
|
|
char * enc = get_encoding();
|
|
csconv = get_current_cs(enc);
|
|
+ bool bUTF8 = strcasecmp(enc, "UTF-8") != 0;
|
|
free(enc);
|
|
enc = NULL;
|
|
|
|
@@ -642,7 +702,7 @@
|
|
free(wordchars);
|
|
} else *expw = '\0';
|
|
|
|
- for (int i = 0; i <= 255; i++) {
|
|
+ for (int i = 0; i <= (bUTF8 ? 127 : 255); i++) {
|
|
if ( (csconv[i].cupper != csconv[i].clower) &&
|
|
(! strchr(expw, (char) i))) {
|
|
*(expw + strlen(expw) + 1) = '\0';
|
|
@@ -659,6 +719,9 @@
|
|
breaktable[0] = mystrdup("-");
|
|
numbreak = 1;
|
|
}
|
|
+
|
|
+ fill_unicode_equivalents();
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
Only in hunspell-1.1.5/src/hunspell: affixmgr.cxx.orig
|
|
Only in hunspell-1.1.5/src/hunspell: affixmgr.cxx.rej
|
|
diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx hunspell-1.1.5/src/hunspell/affixmgr.hxx
|
|
--- hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx 2007-06-06 13:04:45.000000000 +0100
|
|
+++ hunspell-1.1.5/src/hunspell/affixmgr.hxx 2007-06-06 13:05:02.000000000 +0100
|
|
@@ -203,6 +203,7 @@
|
|
int process_pfx_tree_to_list();
|
|
int process_sfx_tree_to_list();
|
|
int redundant_condition(char, char * strip, int stripl, const char * cond, char *);
|
|
+ void fill_unicode_equivalents();
|
|
};
|
|
|
|
#endif
|
|
diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell/csutil.cxx
|
|
--- hunspell-1.1.5.orig/src/hunspell/csutil.cxx 2007-06-06 13:04:45.000000000 +0100
|
|
+++ hunspell-1.1.5/src/hunspell/csutil.cxx 2007-06-06 13:05:02.000000000 +0100
|
|
@@ -5090,6 +5090,10 @@
|
|
#ifndef OPENOFFICEORG
|
|
#ifndef MOZILLA_CLIENT
|
|
int initialize_utf_tbl() {
|
|
+
|
|
+ if (utf_tbl)
|
|
+ return 0;
|
|
+
|
|
utf_tbl = (unicode_info2 *) malloc(CONTSIZE * sizeof(unicode_info2));
|
|
if (utf_tbl) {
|
|
int j;
|
|
diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parsers/textparser.cxx
|
|
--- hunspell-1.1.5.orig/src/parsers/textparser.cxx 2007-06-06 13:04:45.000000000 +0100
|
|
+++ hunspell-1.1.5/src/parsers/textparser.cxx 2007-06-06 13:05:02.000000000 +0100
|
|
@@ -5,6 +5,7 @@
|
|
|
|
#include "../hunspell/csutil.hxx"
|
|
#include "textparser.hxx"
|
|
+#include <langinfo.h>
|
|
|
|
#ifndef W32
|
|
using namespace std;
|
|
@@ -46,22 +47,52 @@
|
|
|
|
#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *))
|
|
|
|
-TextParser::TextParser() {
|
|
+TextParser::TextParser() : todict(NULL), fromdict(NULL) {
|
|
init((char *) NULL);
|
|
}
|
|
|
|
-TextParser::TextParser(const char * wordchars)
|
|
+extern char *changeenc(iconv_t cd, char *token);
|
|
+
|
|
+char *TextParser::todictenc(char *token)
|
|
+{
|
|
+ return changeenc(todict, token);
|
|
+}
|
|
+
|
|
+char *TextParser::fromdictenc(char *token)
|
|
+{
|
|
+ return changeenc(fromdict, token);
|
|
+}
|
|
+
|
|
+void TextParser::openiconv(const char *dictenc)
|
|
+{
|
|
+ todict = fromdict = NULL;
|
|
+ const char *srcenc = nl_langinfo(CODESET);
|
|
+
|
|
+ if (dictenc && srcenc && (strcmp(dictenc, srcenc) != 0))
|
|
+ {
|
|
+ if ((todict = iconv_open(dictenc, srcenc)) == (iconv_t) -1)
|
|
+ todict = NULL;
|
|
+ if ((fromdict = iconv_open(srcenc, dictenc)) == (iconv_t) -1)
|
|
+ fromdict = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+TextParser::TextParser(const char * wordchars, const char *destenc)
|
|
{
|
|
init(wordchars);
|
|
+ openiconv(destenc);
|
|
}
|
|
|
|
-TextParser::TextParser(unsigned short * wordchars, int len)
|
|
+TextParser::TextParser(unsigned short * wordchars, int len, const char *destenc)
|
|
{
|
|
init(wordchars, len);
|
|
+ openiconv(destenc);
|
|
}
|
|
|
|
TextParser::~TextParser()
|
|
{
|
|
+ if (todict) iconv_close(todict);
|
|
+ if (fromdict) iconv_close(fromdict);
|
|
}
|
|
|
|
int TextParser::is_wordchar(char * w)
|
|
diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parsers/textparser.hxx
|
|
--- hunspell-1.1.5.orig/src/parsers/textparser.hxx 2007-06-06 13:04:45.000000000 +0100
|
|
+++ hunspell-1.1.5/src/parsers/textparser.hxx 2007-06-06 13:05:02.000000000 +0100
|
|
@@ -20,12 +20,15 @@
|
|
*
|
|
*/
|
|
|
|
+#include <iconv.h>
|
|
+
|
|
class TextParser
|
|
{
|
|
|
|
protected:
|
|
void init(const char *);
|
|
void init(unsigned short * wordchars, int len);
|
|
+ void openiconv(const char *dictenc);
|
|
int wordcharacters[256]; // for detection of the word boundaries
|
|
char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines
|
|
int actual; // actual line
|
|
@@ -36,12 +39,14 @@
|
|
int next_char(char * line, int * pos);
|
|
unsigned short * wordchars_utf16;
|
|
int wclen;
|
|
+ iconv_t todict;
|
|
+ iconv_t fromdict;
|
|
|
|
public:
|
|
|
|
TextParser();
|
|
- TextParser(unsigned short * wordchars, int len);
|
|
- TextParser(const char * wc);
|
|
+ TextParser(unsigned short * wordchars, int len, const char *destenc);
|
|
+ TextParser(const char * wc, const char *destenc);
|
|
virtual ~TextParser();
|
|
|
|
void put_line(char * line);
|
|
@@ -53,7 +58,9 @@
|
|
int is_wordchar(char * w);
|
|
char * get_latin1(char * s);
|
|
char * next_char();
|
|
-
|
|
+
|
|
+ char * todictenc(char * in);
|
|
+ char * fromdictenc(char * in);
|
|
};
|
|
|
|
#endif
|
|
diff -ru hunspell-1.1.5.orig/src/tools/hunspell.cxx hunspell-1.1.5/src/tools/hunspell.cxx
|
|
--- hunspell-1.1.5.orig/src/tools/hunspell.cxx 2007-06-06 13:04:45.000000000 +0100
|
|
+++ hunspell-1.1.5/src/tools/hunspell.cxx 2007-06-06 13:05:02.000000000 +0100
|
|
@@ -89,6 +89,8 @@
|
|
#endif
|
|
#endif
|
|
|
|
+#include <langinfo.h>
|
|
+
|
|
#define TEMPNAME "hunSPELL.bak"
|
|
|
|
extern char * mystrdup(const char * s);
|
|
@@ -130,9 +132,11 @@
|
|
|
|
TextParser * newParser(char * wordchars, int format, char * extension, Hunspell * pMS) {
|
|
TextParser * p = NULL;
|
|
- int utf8 = (strcmp(pMS->get_dic_encoding(), "UTF-8") == 0);
|
|
+ int utf8 = (strcmp(nl_langinfo(CODESET), "UTF-8") == 0);
|
|
|
|
if (utf8) {
|
|
+ if (initialize_utf_tbl()) return NULL;
|
|
+
|
|
switch (format) {
|
|
case FMT_LATEX: p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len); break;
|
|
case FMT_HTML: p = new HTMLParser(wordchars_utf16, wordchars_utf16_len); break;
|
|
@@ -173,9 +177,9 @@
|
|
}
|
|
if (!p) {
|
|
if (utf8) {
|
|
- p = new TextParser(wordchars_utf16, wordchars_utf16_len);
|
|
+ p = new TextParser(wordchars_utf16, wordchars_utf16_len, pMS->get_dic_encoding());
|
|
} else {
|
|
- p = new TextParser(wordchars);
|
|
+ p = new TextParser(wordchars, pMS->get_dic_encoding());
|
|
}
|
|
}
|
|
return p;
|
|
@@ -260,7 +264,7 @@
|
|
w = w->next;
|
|
free(r);
|
|
}
|
|
- fclose(dic);
|
|
+ return 0 == fclose(dic);
|
|
}
|
|
|
|
char * basename(char * s, char c) {
|
|
@@ -373,13 +377,15 @@
|
|
|
|
if (pos >= 0) {
|
|
parser->put_line(buf + pos);
|
|
- while ((token = parser->next_token())) {
|
|
+ while ((token = parser->todictenc(parser->next_token()))) {
|
|
switch (filter_mode) {
|
|
|
|
case BADWORD: {
|
|
- if (! pMS->spell(token)) {
|
|
+ int nRes = pMS->spell(token);
|
|
+ token = parser->fromdictenc(token);
|
|
+ if (!nRes) {
|
|
bad = 1;
|
|
- if (! printgood) fprintf(stdout,"%s\n", token);
|
|
+ if (!printgood) fprintf(stdout,"%s\n", token);
|
|
} else {
|
|
if (printgood) fprintf(stdout,"%s\n", token);
|
|
}
|
|
@@ -414,6 +420,8 @@
|
|
char ** wlst = NULL;
|
|
bad = 1;
|
|
int ns = pMS->suggest_auto(&wlst, token);
|
|
+ for (int j = 0; j < ns; j++)
|
|
+ wlst[j] = parser->fromdictenc(wlst[j]);
|
|
if (ns > 0) {
|
|
parser->change_token(wlst[0]);
|
|
if (filter_mode != AUTO2) {
|
|
@@ -451,6 +459,8 @@
|
|
} else {
|
|
char ** wlst = NULL;
|
|
int ns = pMS->suggest(&wlst, token);
|
|
+ for (int j = 0; j < ns; j++)
|
|
+ wlst[j] = parser->fromdictenc(wlst[j]);
|
|
if (ns == 0) {
|
|
fprintf(stdout,"# %s %d", token, parser->get_tokenpos() + pos);
|
|
} else {
|
|
@@ -554,6 +564,9 @@
|
|
getmaxyx(stdscr,y,x);
|
|
clear();
|
|
|
|
+ token = mystrdup(token);
|
|
+ token = parser->fromdictenc(token);
|
|
+
|
|
if (forbidden) printw(gettext("FORBIDDEN!"));
|
|
printw(gettext("\t%s\t\tFile: %s\n\n"), token,filename);
|
|
|
|
@@ -899,11 +912,13 @@
|
|
char * token;
|
|
int dialogexit = 0;
|
|
int info;
|
|
- while ((token=parser->next_token())) {
|
|
+ while ((token=parser->todictenc(parser->next_token()))) {
|
|
if (! pMS->spell(token, &info, NULL)) {
|
|
dialogscreen(parser, token, filename, (info & SPELL_FORBIDDEN), NULL, 0); // preview
|
|
char ** wlst = NULL;
|
|
int ns = pMS->suggest(&wlst,token);
|
|
+ for (int j = 0; j < ns; j++)
|
|
+ wlst[j] = parser->fromdictenc(wlst[j]);
|
|
if (ns==0) {
|
|
dialogexit = dialog(parser, pMS, token, filename, wlst, ns, (info & SPELL_FORBIDDEN));
|
|
} else {
|