From 0a1b5e574277412ef8e938c776d557c84b7c1d22 Mon Sep 17 00:00:00 2001
From: Caolan McNamara <caolanm@fedoraproject.org>
Date: Tue, 22 May 2007 11:42:41 +0000
Subject: [PATCH] add 8bit dictionary word chars to the unicode list

---
 hunspell-1.1.5-missingheaders.patch | 203 ----------------------------
 hunspell-1.1.5.encoding.patch       | 131 ++++++++++++++----
 hunspell.spec                       |   6 +-
 3 files changed, 106 insertions(+), 234 deletions(-)
 delete mode 100644 hunspell-1.1.5-missingheaders.patch

diff --git a/hunspell-1.1.5-missingheaders.patch b/hunspell-1.1.5-missingheaders.patch
deleted file mode 100644
index 85685ba..0000000
--- a/hunspell-1.1.5-missingheaders.patch
+++ /dev/null
@@ -1,203 +0,0 @@
---- /dev/null	2007-03-16 11:16:34.774869017 +0000
-+++ hunspell-1.1.5/src/tools/munch.h	2007-03-20 09:19:49.000000000 +0000
-@@ -0,0 +1,121 @@
-+/* munch header file */
-+
-+#define MAX_LN_LEN    200
-+#define MAX_WD_LEN    200
-+#define MAX_PREFIXES  256
-+#define MAX_SUFFIXES  256
-+#define MAX_ROOTS      20
-+#define MAX_WORDS     5000
-+ 
-+#define ROTATE_LEN      5
-+ 
-+#define ROTATE(v,q) \
-+   (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
-+
-+#define SET_SIZE      256
-+
-+#define XPRODUCT  (1 << 0)
-+
-+/* the affix table entry */
-+
-+struct affent
-+{
-+    char *  appnd;
-+    char *  strip;
-+    short   appndl;
-+    short   stripl;
-+    char    achar;
-+    char    xpflg;   
-+    short   numconds;
-+    char    conds[SET_SIZE];
-+};
-+
-+
-+struct affixptr
-+{
-+    struct affent * aep;
-+    int		    num;
-+};
-+
-+/* the prefix and suffix table */
-+int	numpfx;		/* Number of prefixes in table */
-+int     numsfx;		/* Number of suffixes in table */
-+
-+/* the prefix table */
-+struct affixptr          ptable[MAX_PREFIXES];
-+
-+/* the suffix table */
-+struct affixptr          stable[MAX_SUFFIXES];
-+
-+
-+/* data structure to store results of lookups */
-+struct matches
-+{
-+    struct hentry *	hashent;	/* hash table entry */
-+    struct affent *	prefix;		/* Prefix used, or NULL */
-+    struct affent *	suffix;		/* Suffix used, or NULL */
-+};
-+
-+int    numroots;	          /* number of root words found */
-+struct matches  roots[MAX_ROOTS]; /* list of root words found */
-+
-+/* hashing stuff */
-+
-+struct hentry
-+{
-+  char * word;
-+  char * affstr;
-+  struct hentry * next;
-+  int keep;
-+};
-+
-+ 
-+int             tablesize;
-+struct hentry * tableptr;
-+
-+/* unmunch stuff */
-+
-+int    numwords;	          /* number of words found */
-+struct dwords
-+{
-+  char * word;
-+  int pallow;
-+};
-+
-+struct dwords  wlist[MAX_WORDS]; /* list words found */
-+
-+
-+/* the routines */
-+
-+int parse_aff_file(FILE* afflst);
-+
-+void encodeit(struct affent * ptr, char * cs);
-+
-+int load_tables(FILE * wrdlst);
-+
-+int hash(const char *);
-+
-+int add_word(char *);
-+
-+struct hentry * lookup(const char *);
-+
-+void aff_chk (const char * word, int len);
-+
-+void pfx_chk (const char * word, int len, struct affent* ep, int num);
-+
-+void suf_chk (const char * word, int len, struct affent * ep, int num, 
-+	      struct affent * pfxent, int cpflag);
-+
-+void add_affix_char(struct hentry * hent, char ac);
-+
-+int expand_rootword(const char *, int, const char*, int);
-+
-+void pfx_add (const char * word, int len, struct affent* ep, int num);
-+
-+void suf_add (const char * word, int len, struct affent * ep, int num);
-+
-+char * mystrsep(char ** stringp, const char delim);
-+
-+char * mystrdup(const char * s);
-+
-+void mychomp(char * s);
---- /dev/null	2007-03-16 11:16:34.774869017 +0000
-+++ hunspell-1.1.5/src/tools/unmunch.h	2007-03-20 09:19:50.000000000 +0000
-@@ -0,0 +1,76 @@
-+/* unmunch header file */
-+
-+#define MAX_LN_LEN    200
-+#define MAX_WD_LEN    200
-+#define MAX_PREFIXES  256
-+#define MAX_SUFFIXES  256
-+#define MAX_WORDS     5000
-+ 
-+#define ROTATE_LEN      5
-+ 
-+#define ROTATE(v,q) \
-+   (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
-+
-+#define SET_SIZE      256
-+
-+#define XPRODUCT  (1 << 0)
-+
-+/* the affix table entry */
-+
-+struct affent
-+{
-+    char *  appnd;
-+    char *  strip;
-+    short   appndl;
-+    short   stripl;
-+    char    achar;
-+    char    xpflg;   
-+    short   numconds;
-+    char    conds[SET_SIZE];
-+};
-+
-+
-+struct affixptr
-+{
-+    struct affent * aep;
-+    int		    num;
-+};
-+
-+/* the prefix and suffix table */
-+int	numpfx;		/* Number of prefixes in table */
-+int     numsfx;		/* Number of suffixes in table */
-+
-+/* the prefix table */
-+struct affixptr          ptable[MAX_PREFIXES];
-+
-+/* the suffix table */
-+struct affixptr          stable[MAX_SUFFIXES];
-+
-+
-+int    numwords;	          /* number of words found */
-+struct dwords
-+{
-+  char * word;
-+  int pallow;
-+};
-+
-+struct dwords  wlist[MAX_WORDS]; /* list words found */
-+
-+
-+/* the routines */
-+
-+int parse_aff_file(FILE* afflst);
-+
-+void encodeit(struct affent * ptr, char * cs);
-+
-+int expand_rootword(const char *, int, const char*, int);
-+
-+void pfx_add (const char * word, int len, struct affent* ep, int num);
-+
-+void suf_add (const char * word, int len, struct affent * ep, int num);
-+
-+char * mystrsep(char ** stringp, const char delim);
-+
-+char * mystrdup(const char * s);
-+
-+void mychomp(char * s);
diff --git a/hunspell-1.1.5.encoding.patch b/hunspell-1.1.5.encoding.patch
index 44dc772..c8d7166 100644
--- a/hunspell-1.1.5.encoding.patch
+++ b/hunspell-1.1.5.encoding.patch
@@ -1,6 +1,101 @@
+diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx hunspell-1.1.5/src/hunspell/affixmgr.cxx
+--- hunspell-1.1.5.orig/src/hunspell/affixmgr.cxx	2007-03-12 11:38:23.000000000 +0000
++++ hunspell-1.1.5/src/hunspell/affixmgr.cxx	2007-05-22 12:35:27.000000000 +0100
+@@ -19,6 +19,8 @@
+ 
+ #include "csutil.hxx"
+ 
++#include <iconv.h>
++
+ #ifndef MOZILLA_CLIENT
+ #ifndef W32
+ using namespace std;
+@@ -230,6 +232,61 @@
+   checknum=0;
+ }
+ 
++char *changeenc(iconv_t cd, char *token) 
++{ 
++        if (!cd || !token) return token; 
++         
++        iconv (cd, NULL, NULL, NULL, NULL); 
++ 
++        char *inptr = token; 
++        size_t insize = strlen(token); 
++ 
++        size_t avail = (insize + 1) * 3; 
++        char *wrptr = (char*)malloc(avail); 
++        char *output = wrptr; 
++ 
++        memset(wrptr, 0, avail); 
++        size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail); 
++        free(token); 
++ 
++        if (nconv == (size_t) -1) 
++        { 
++                free(output); 
++                output = NULL; 
++        } 
++ 
++        return output; 
++}
++
++void AffixMgr::fill_unicode_equivalents()
++{
++    if ((!ignorechars_utf16_len || !wordchars_utf16_len) && (ignorechars || wordchars))
++    {
++	iconv_t to_utf16 = iconv_open("UCS2", encoding);
++	if (to_utf16 == 0 || to_utf16 == (iconv_t) -1)
++		return;
++
++	unsigned short *p;
++
++	if (!ignorechars_utf16_len && ignorechars && strlen(ignorechars))
++	{
++		char *tmp = mystrdup(ignorechars);
++		ignorechars_utf16 = (unsigned short*)changeenc(to_utf16, tmp);
++		for (p = ignorechars_utf16; *p; ++p);
++		ignorechars_utf16_len = p - ignorechars_utf16;
++		flag_qsort(ignorechars_utf16, 0, ignorechars_utf16_len);
++	}
++
++	if (!wordchars_utf16_len && wordchars && strlen(wordchars))
++	{
++		char *tmp = mystrdup(wordchars);
++		wordchars_utf16 = (unsigned short*)changeenc(to_utf16, tmp);
++		for (p = wordchars_utf16; *p; ++p);
++		wordchars_utf16_len = p - wordchars_utf16;
++		flag_qsort(wordchars_utf16, 0, wordchars_utf16_len);
++	}
++    }
++}
+ 
+ // read in aff file and build up prefix and suffix entry objects 
+ int  AffixMgr::parse_file(const char * affpath)
+@@ -659,6 +716,9 @@
+         breaktable[0] = mystrdup("-");
+         numbreak = 1;
+     }
++
++    fill_unicode_equivalents();
++
+     return 0;
+ }
+ 
+diff -ru hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx hunspell-1.1.5/src/hunspell/affixmgr.hxx
+--- hunspell-1.1.5.orig/src/hunspell/affixmgr.hxx	2007-03-12 11:42:52.000000000 +0000
++++ hunspell-1.1.5/src/hunspell/affixmgr.hxx	2007-05-22 12:00:14.000000000 +0100
+@@ -203,6 +203,7 @@
+   int process_pfx_tree_to_list();
+   int process_sfx_tree_to_list();
+   int redundant_condition(char, char * strip, int stripl, const char * cond, char *);
++  void fill_unicode_equivalents();
+ };
+ 
+ #endif
 diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell/csutil.cxx
 --- hunspell-1.1.5.orig/src/hunspell/csutil.cxx	2007-03-12 12:01:56.000000000 +0000
-+++ hunspell-1.1.5/src/hunspell/csutil.cxx	2007-05-21 13:31:46.000000000 +0100
++++ hunspell-1.1.5/src/hunspell/csutil.cxx	2007-05-22 12:21:30.000000000 +0100
 @@ -5090,6 +5090,10 @@
  #ifndef OPENOFFICEORG
  #ifndef MOZILLA_CLIENT
@@ -14,7 +109,7 @@ diff -ru hunspell-1.1.5.orig/src/hunspell/csutil.cxx hunspell-1.1.5/src/hunspell
      int j;
 diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parsers/textparser.cxx
 --- hunspell-1.1.5.orig/src/parsers/textparser.cxx	2007-03-05 12:59:53.000000000 +0000
-+++ hunspell-1.1.5/src/parsers/textparser.cxx	2007-05-21 13:31:46.000000000 +0100
++++ hunspell-1.1.5/src/parsers/textparser.cxx	2007-05-22 12:02:24.000000000 +0100
 @@ -5,6 +5,7 @@
  
  #include "../hunspell/csutil.hxx"
@@ -23,7 +118,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse
  
  #ifndef W32
  using namespace std;
-@@ -46,22 +47,76 @@
+@@ -46,22 +47,52 @@
  
  #define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *))
  
@@ -33,31 +128,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse
  }
  
 -TextParser::TextParser(const char * wordchars)
-+static char *changeenc(iconv_t cd, char *token)
-+{
-+	if (!cd || !token) return token;
-+	
-+	iconv (cd, NULL, NULL, NULL, NULL);
-+
-+	char *inptr = token;
-+	size_t insize = strlen(token);
-+
-+	size_t avail = (insize + 1) * 3;
-+	char *wrptr = (char*)malloc(avail);
-+	char *output = wrptr;
-+
-+	memset(wrptr, 0, avail);
-+	size_t nconv = iconv (cd, &inptr, &insize, &wrptr, &avail);
-+	free(token);
-+
-+        if (nconv == (size_t) -1)
-+        {
-+        	free(output);
-+		output = NULL;
-+	}
-+
-+	return output;
-+}
++extern char *changeenc(iconv_t cd, char *token);
 +
 +char *TextParser::todictenc(char *token)
 +{
@@ -105,7 +176,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.cxx hunspell-1.1.5/src/parse
  int TextParser::is_wordchar(char * w)
 diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parsers/textparser.hxx
 --- hunspell-1.1.5.orig/src/parsers/textparser.hxx	2007-01-19 01:01:07.000000000 +0000
-+++ hunspell-1.1.5/src/parsers/textparser.hxx	2007-05-21 13:31:46.000000000 +0100
++++ hunspell-1.1.5/src/parsers/textparser.hxx	2007-05-22 11:29:52.000000000 +0100
 @@ -20,12 +20,15 @@
   *
   */
@@ -152,7 +223,7 @@ diff -ru hunspell-1.1.5.orig/src/parsers/textparser.hxx hunspell-1.1.5/src/parse
  #endif
 diff -ru hunspell-1.1.5.orig/src/tools/hunspell.cxx hunspell-1.1.5/src/tools/hunspell.cxx
 --- hunspell-1.1.5.orig/src/tools/hunspell.cxx	2007-05-21 13:31:55.000000000 +0100
-+++ hunspell-1.1.5/src/tools/hunspell.cxx	2007-05-21 13:31:46.000000000 +0100
++++ hunspell-1.1.5/src/tools/hunspell.cxx	2007-05-22 11:15:33.000000000 +0100
 @@ -89,6 +89,8 @@
  #endif
  #endif
diff --git a/hunspell.spec b/hunspell.spec
index f9675da..ede9db8 100644
--- a/hunspell.spec
+++ b/hunspell.spec
@@ -1,7 +1,7 @@
 Name:      hunspell
 Summary:   Hunspell is a spell checker and morphological analyzer library
 Version:   1.1.5.3
-Release:   2%{?dist}
+Release:   3%{?dist}
 Source:    http://downloads.sourceforge.net/%{name}/hunspell-1.1.5-3.tar.gz
 Group:     System Environment/Libraries
 URL:       http://hunspell.sourceforge.net/
@@ -82,6 +82,10 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/pkgconfig/hunspell.pc
 
 %changelog
+* Tue May 22 2007 Caolan McNamara <caolanm@redhat.com> - 1.1.5.3-3
+- Resolves: rhbz#240696 extend encoding patch to promote and add
+  dictionary 8bit WORDCHARS to the ucs-2 word char list
+
 * Mon May 21 2007 Caolan McNamara <caolanm@redhat.com> - 1.1.5.3-2
 - Resolves: rhbz#240696 add hunspell-1.1.5.encoding.patch