move to ayaspell (bug #427433)
This commit is contained in:
parent
19f51d7af4
commit
3c15f33940
@ -1 +1 @@
|
|||||||
buckwalter_morphan_1_hunspell_patch.tar.gz
|
hunspell-ar_20080110.tar.gz
|
||||||
|
@ -1,453 +0,0 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
|
|
||||||
# buckwalter2unicode.py - A script to convert transliterated Arabic
|
|
||||||
# (using the Buckwalter system) to Unicode.
|
|
||||||
#
|
|
||||||
# Version 0.2 - 15th September 2004
|
|
||||||
#
|
|
||||||
# Andrew Roberts (andyr [at] comp (dot) leeds [dot] ac (dot) uk)
|
|
||||||
#
|
|
||||||
# Project homepage: http://www.comp.leeds.ac.uk/andyr/software/
|
|
||||||
#
|
|
||||||
# Now, listen carefully...
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU General Public License
|
|
||||||
# along with this program; if not, write to the Free Software
|
|
||||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
#
|
|
||||||
|
|
||||||
import sys, getopt, codecs, os, re
|
|
||||||
|
|
||||||
# Declare a dictionary with Buckwalter's ASCII symbols as the keys, and
|
|
||||||
# their unicode equivalents as values.
|
|
||||||
|
|
||||||
buck2uni = {"'": u"\u0621", # hamza-on-the-line
|
|
||||||
"|": u"\u0622", # madda
|
|
||||||
">": u"\u0623", # hamza-on-'alif
|
|
||||||
"&": u"\u0624", # hamza-on-waaw
|
|
||||||
"<": u"\u0625", # hamza-under-'alif
|
|
||||||
"}": u"\u0626", # hamza-on-yaa'
|
|
||||||
"A": u"\u0627", # bare 'alif
|
|
||||||
"b": u"\u0628", # baa'
|
|
||||||
"p": u"\u0629", # taa' marbuuTa
|
|
||||||
"t": u"\u062A", # taa'
|
|
||||||
"v": u"\u062B", # thaa'
|
|
||||||
"j": u"\u062C", # jiim
|
|
||||||
"H": u"\u062D", # Haa'
|
|
||||||
"x": u"\u062E", # khaa'
|
|
||||||
"d": u"\u062F", # daal
|
|
||||||
"*": u"\u0630", # dhaal
|
|
||||||
"r": u"\u0631", # raa'
|
|
||||||
"z": u"\u0632", # zaay
|
|
||||||
"s": u"\u0633", # siin
|
|
||||||
"$": u"\u0634", # shiin
|
|
||||||
"S": u"\u0635", # Saad
|
|
||||||
"D": u"\u0636", # Daad
|
|
||||||
"T": u"\u0637", # Taa'
|
|
||||||
"Z": u"\u0638", # Zaa' (DHaa')
|
|
||||||
"E": u"\u0639", # cayn
|
|
||||||
"g": u"\u063A", # ghayn
|
|
||||||
"_": u"\u0640", # taTwiil
|
|
||||||
"f": u"\u0641", # faa'
|
|
||||||
"q": u"\u0642", # qaaf
|
|
||||||
"k": u"\u0643", # kaaf
|
|
||||||
"l": u"\u0644", # laam
|
|
||||||
"m": u"\u0645", # miim
|
|
||||||
"n": u"\u0646", # nuun
|
|
||||||
"h": u"\u0647", # haa'
|
|
||||||
"w": u"\u0648", # waaw
|
|
||||||
"Y": u"\u0649", # 'alif maqSuura
|
|
||||||
"y": u"\u064A", # yaa'
|
|
||||||
"F": u"\u064B", # fatHatayn
|
|
||||||
"N": u"\u064C", # Dammatayn
|
|
||||||
"K": u"\u064D", # kasratayn
|
|
||||||
"a": u"\u064E", # fatHa
|
|
||||||
"u": u"\u064F", # Damma
|
|
||||||
"i": u"\u0650", # kasra
|
|
||||||
"~": u"\u0651", # shaddah
|
|
||||||
"o": u"\u0652", # sukuun
|
|
||||||
"`": u"\u0670", # dagger 'alif
|
|
||||||
"{": u"\u0671", # waSla
|
|
||||||
}
|
|
||||||
|
|
||||||
# For a reverse transliteration (Unicode -> Buckwalter), a dictionary
|
|
||||||
# which is the reverse of the above buck2uni is essential.
|
|
||||||
|
|
||||||
uni2buck = {}
|
|
||||||
|
|
||||||
# Iterate through all the items in the buck2uni dict.
|
|
||||||
for (key, value) in buck2uni.iteritems():
|
|
||||||
# The value from buck2uni becomes a key in uni2buck, and vice
|
|
||||||
# versa for the keys.
|
|
||||||
uni2buck[value] = key
|
|
||||||
|
|
||||||
# Declare some global variables...
|
|
||||||
|
|
||||||
|
|
||||||
inFilename = "" # Name of filename containing input.
|
|
||||||
outFilename = "" # Name of filename to send the output
|
|
||||||
inEnc = "" # The text encoding of the input file
|
|
||||||
outEnc = "" # The text encoding for the output file
|
|
||||||
ignoreChars = "" # If lines begin with these symbols, ignore.
|
|
||||||
columnRange = "" # Holds columns numbers to transliterate.
|
|
||||||
delimiter = "" # Holds user-defined column delimiter.
|
|
||||||
reverse = 0 # When equal to 1, perform reverse transliteration, i.e.,
|
|
||||||
# Unicode -> Buckwalter.
|
|
||||||
|
|
||||||
# A function to print to screen the usage details of this script.
|
|
||||||
|
|
||||||
def usage():
|
|
||||||
print "Usage:", sys.argv[0], "-i INFILE -o OUTFILE [-g CHARS -c RANGE -d CHAR"
|
|
||||||
print " -r -e INPUT_ENCODING, -E OUTPUT ENCODING]"
|
|
||||||
print " ", sys.argv[0], "-l"
|
|
||||||
print " ", sys.argv[0], "-h"
|
|
||||||
print ""
|
|
||||||
print " -i INFILE, --input=INFILE:"
|
|
||||||
print " Path to text file to be transliterated to Unicode."
|
|
||||||
print " -o OUTFILE, --output=OUTFILE:"
|
|
||||||
print " Path of file to output the newly transliterated text."
|
|
||||||
print " -e ENC, --input-encoding=ENC:"
|
|
||||||
print " Specify the text encoding of the source file. Default: latin_1."
|
|
||||||
print " -E ENC, --output-encoding=ENC:"
|
|
||||||
print " Specify the text encoding of the target file. Default: utf_8."
|
|
||||||
print " -g CHARS, --ignore-lines=CHARS:"
|
|
||||||
print " Will not transliterate lines that start with any of the CHARS"
|
|
||||||
print " given. E.g., -g #; will not alter lines starting with # or ;."
|
|
||||||
print " (May need to be -g \#\; on some platforms. See README.txt.)"
|
|
||||||
print " -c RANGE, --columns=RANGE:"
|
|
||||||
print " If in columns, select columns to apply transliteration. Can be"
|
|
||||||
print " comma separated numbers, or a range. E.g., -c 1, -c 1-3, -c 1,3."
|
|
||||||
print " -d CHAR, --delimiter=CHAR:"
|
|
||||||
print " Specify the delimiter that defines the column if using the -c"
|
|
||||||
print " option above. Default is ' ' (space)."
|
|
||||||
print " -r, --reverse:"
|
|
||||||
print " Reverses the transliteration, i.e., Arabic to Buckwalter."
|
|
||||||
print " When used, it will change the default input encoding to utf_8 and"
|
|
||||||
print " output encoding to latin_1"
|
|
||||||
print " -l, --list-encodings:"
|
|
||||||
print " Displays all supported file encodings."
|
|
||||||
print " -h, --help:"
|
|
||||||
print " Displays this page."
|
|
||||||
print ""
|
|
||||||
|
|
||||||
# A function to print to screen all the available encodings supported by
|
|
||||||
# Python.
|
|
||||||
|
|
||||||
def displayEncodings():
|
|
||||||
print "Codec Aliases Languages"
|
|
||||||
print "ascii 646, us-ascii English"
|
|
||||||
print "cp037 IBM037, IBM039 English"
|
|
||||||
print "cp424 EBCDIC-CP-HE, IBM424 Hebrew"
|
|
||||||
print "cp437 437, IBM437 English"
|
|
||||||
print "cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 Western Europe"
|
|
||||||
print "cp737 Greek"
|
|
||||||
print "cp775 IBM775 Baltic languages"
|
|
||||||
print "cp850 850, IBM850 Western Europe"
|
|
||||||
print "cp852 852, IBM852 Central and Eastern Europe"
|
|
||||||
print "cp855 855, IBM855 Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
|
|
||||||
print "cp856 Hebrew"
|
|
||||||
print "cp857 857, IBM857 Turkish"
|
|
||||||
print "cp860 860, IBM860 Portuguese"
|
|
||||||
print "cp861 861, CP-IS, IBM861 Icelandic"
|
|
||||||
print "cp862 862, IBM862 Hebrew"
|
|
||||||
print "cp863 863, IBM863 Canadian"
|
|
||||||
print "cp864 IBM864 Arabic"
|
|
||||||
print "cp865 865, IBM865 Danish, Norwegian"
|
|
||||||
print "cp869 869, CP-GR, IBM869 Greek"
|
|
||||||
print "cp874 Thai"
|
|
||||||
print "cp875 Greek"
|
|
||||||
print "cp1006 Urdu"
|
|
||||||
print "cp1026 ibm1026 Turkish"
|
|
||||||
print "cp1140 ibm1140 Western Europe"
|
|
||||||
print "cp1250 windows-1250 Central and Eastern Europe"
|
|
||||||
print "cp1251 windows-1251 Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
|
|
||||||
print "cp1252 windows-1252 Western Europe"
|
|
||||||
print "cp1253 windows-1253 Greek"
|
|
||||||
print "cp1254 windows-1254 Turkish"
|
|
||||||
print "cp1255 windows-1255 Hebrew"
|
|
||||||
print "cp1256 windows-1256 Arabic"
|
|
||||||
print "cp1257 windows-1257 Baltic languages"
|
|
||||||
print "cp1258 windows-1258 Vietnamese"
|
|
||||||
print "latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 West Europe"
|
|
||||||
print "iso8859_2 iso-8859-2, latin2, L2 Central and Eastern Europe"
|
|
||||||
print "iso8859_3 iso-8859-3, latin3, L3 Esperanto, Maltese"
|
|
||||||
print "iso8859_4 iso-8859-4, latin4, L4 Baltic languagues"
|
|
||||||
print "iso8859_5 iso-8859-5, cyrillic Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
|
|
||||||
print "iso8859_6 iso-8859-6, arabic Arabic"
|
|
||||||
print "iso8859_7 iso-8859-7, greek, greek8 Greek"
|
|
||||||
print "iso8859_8 iso-8859-8, hebrew Hebrew"
|
|
||||||
print "iso8859_9 iso-8859-9, latin5, L5 Turkish"
|
|
||||||
print "iso8859_10 iso-8859-10, latin6, L6 Nordic languages"
|
|
||||||
print "iso8859_13 iso-8859-13 Baltic languages"
|
|
||||||
print "iso8859_14 iso-8859-14, latin8, L8 Celtic languages"
|
|
||||||
print "iso8859_15 iso-8859-15 Western Europe"
|
|
||||||
print "koi8_r Russian"
|
|
||||||
print "koi8_u Ukrainian"
|
|
||||||
print "mac_cyrillic maccyrillic Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
|
|
||||||
print "mac_greek macgreek Greek"
|
|
||||||
print "mac_iceland maciceland Icelandic"
|
|
||||||
print "mac_latin2 maclatin2, maccentraleurope Central and Eastern Europe"
|
|
||||||
print "mac_roman macroman Western Europe"
|
|
||||||
print "mac_turkish macturkish Turkish"
|
|
||||||
print "utf_16 U16, utf16 all languages"
|
|
||||||
print "utf_16_be UTF-16BE all languages (BMP only)"
|
|
||||||
print "utf_16_le UTF-16LE all languages (BMP only)"
|
|
||||||
print "utf_7 U7 all languages"
|
|
||||||
print "utf_8 U8, UTF, utf8 all languages"
|
|
||||||
|
|
||||||
def parseIgnoreString(string):
|
|
||||||
|
|
||||||
symbols = []
|
|
||||||
|
|
||||||
for char in string:
|
|
||||||
symbols.append(char)
|
|
||||||
|
|
||||||
return symbols
|
|
||||||
|
|
||||||
# Begin parsing the command-line arguments...
|
|
||||||
|
|
||||||
try:
|
|
||||||
(options, args) = getopt.getopt(sys.argv[1:], "i:o:e:E:g:c:d:rlh",
|
|
||||||
["input=","output=", "input-encoding=", "output-encoding=",
|
|
||||||
"ignore-lines=", "columns=", "delimiter=" "reverse", "list-encodings",
|
|
||||||
"help"])
|
|
||||||
|
|
||||||
except getopt.GetoptError:
|
|
||||||
# print help information and exit:
|
|
||||||
usage()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Loop over all arguments supplied by the user.
|
|
||||||
for (x, y) in options:
|
|
||||||
if x in ("-h", "--help"):
|
|
||||||
usage()
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
if x in ("-l", "--list-encodings"):
|
|
||||||
displayEncodings()
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
if x in ("-i", "--input"): inFilename = y
|
|
||||||
if x in ("-o", "--output"): outFilename = y
|
|
||||||
if x in ("-e", "--input-encoding"): inEnc= y
|
|
||||||
if x in ("-E", "--output-encoding"): outEnc= y
|
|
||||||
if x in ("-r", "--reverse"): reverse = 1
|
|
||||||
if x in ("-g", "--ignore-lines"): ignoreChars = y
|
|
||||||
if x in ("-c", "--columns"): columnRange = y
|
|
||||||
if x in ("-d", "--delimiter"):
|
|
||||||
delimiter = y
|
|
||||||
# Tabs come in off the command line from "\\t" to "\t". However,
|
|
||||||
# that's equivalent to "\\t" from python's point of view.
|
|
||||||
# Therefore replace any inputted "tabs" with proper tabs before
|
|
||||||
# proceeding.
|
|
||||||
delimiter = delimiter.replace("\\t", "\t")
|
|
||||||
# Do some error checking
|
|
||||||
if len(delimiter) > 1:
|
|
||||||
print >>sys.stderr, "Delimeter should only be a single character. Using first character" + delimiter[0]
|
|
||||||
delimiter = delimiter[0]
|
|
||||||
|
|
||||||
if buck2uni.get(delimiter):
|
|
||||||
print >> sys.stderr, "Invalid delimiter. \"" + delimiter + "\" is part of the Buckwalter character set."
|
|
||||||
print >> sys.stderr, "This will obviously cause much confusion as a delimiter!"
|
|
||||||
print >> sys.stderr, "Please try again. Aborting..."
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# If no delimiter was set then, set the default to " " (space)
|
|
||||||
if not delimiter:
|
|
||||||
delimiter = " "
|
|
||||||
|
|
||||||
# If user didn't specify the encoding of the input file, then revert to
|
|
||||||
# defaults. The defaults can depending on the direction of
|
|
||||||
# transliteration:
|
|
||||||
#
|
|
||||||
# Buckwalter -> Unicode, default = latin1
|
|
||||||
# Unicode -> Buckwalter, default = utf_8
|
|
||||||
|
|
||||||
|
|
||||||
if not inEnc:
|
|
||||||
if reverse:
|
|
||||||
inEnc = "utf_8"
|
|
||||||
else:
|
|
||||||
inEnc = "latin_1"
|
|
||||||
|
|
||||||
# Similarly, if user didn't specify the encoding of the output file,
|
|
||||||
# then revert to defaults. The defaults can depending on the direction
|
|
||||||
# of transliteration:
|
|
||||||
#
|
|
||||||
# Buckwalter -> Unicode, default = utf_8
|
|
||||||
# Unicode -> Buckwalter, default # = latin_1
|
|
||||||
|
|
||||||
if not outEnc:
|
|
||||||
if reverse:
|
|
||||||
outEnc = "latin_1"
|
|
||||||
else:
|
|
||||||
outEnc = "utf_8"
|
|
||||||
|
|
||||||
# Ok, let's get the files open!
|
|
||||||
|
|
||||||
# Providing a file for output was specified...
|
|
||||||
if outFilename:
|
|
||||||
try:
|
|
||||||
# Create a file object, set it to "write" mode using the
|
|
||||||
# specified output encoding.
|
|
||||||
outFile = codecs.open(outFilename, "w", outEnc)
|
|
||||||
|
|
||||||
except IOError, msg:
|
|
||||||
# A problem occurred when trying to open this file. Report to
|
|
||||||
# user...
|
|
||||||
print msg
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Script can not work without somewhere to store the transliteration.
|
|
||||||
# Exit.
|
|
||||||
else:
|
|
||||||
print "Must specify a file to use store the output! Aborting..."
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Providing a file for input was specified...
|
|
||||||
if inFilename:
|
|
||||||
try:
|
|
||||||
# Create a file object, set it to "read" mode using the
|
|
||||||
# specified input encoding.
|
|
||||||
inFile = codecs.open(inFilename, "r", inEnc)
|
|
||||||
|
|
||||||
except IOError, msg:
|
|
||||||
# A problem occurred when trying to open this file. Report to
|
|
||||||
# user...
|
|
||||||
print msg
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# This script requires a file to read from. Exit.
|
|
||||||
else:
|
|
||||||
print "Must specify a file to use as input! Aborting..."
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def getColsFromRange(cRange):
|
|
||||||
|
|
||||||
columns = []
|
|
||||||
hyphenSearch = re.compile(r'-')
|
|
||||||
|
|
||||||
rangeElements = cRange.split(",")
|
|
||||||
|
|
||||||
for i in rangeElements:
|
|
||||||
# If it contains a hyphen (e.g., 1-3)
|
|
||||||
if hyphenSearch.search(i):
|
|
||||||
[start, end] = i.split("-")
|
|
||||||
columns = columns + range(int(start)-1,int(end))
|
|
||||||
else:
|
|
||||||
columns.append(int(i)-1)
|
|
||||||
|
|
||||||
return columns
|
|
||||||
|
|
||||||
# This function transliterates a given string. It checks the direction
|
|
||||||
# of the transliteration and then uses the appropriate dictionary. A
|
|
||||||
# transliterated string is returned.
|
|
||||||
|
|
||||||
def transliterate(inString, lineNumber):
|
|
||||||
out = ""
|
|
||||||
|
|
||||||
if columnRange:
|
|
||||||
columns = getColsFromRange(columnRange)
|
|
||||||
|
|
||||||
# Split the line on the delimiter
|
|
||||||
lineCols = inString.split(delimiter)
|
|
||||||
|
|
||||||
# Iterate over each column. If it's one of the ones in the range
|
|
||||||
# specified, then transliterate, otherwise just output column
|
|
||||||
# unchanged.
|
|
||||||
|
|
||||||
for i in range(len(lineCols)):
|
|
||||||
|
|
||||||
# If first column, then don't prefix the delimiter
|
|
||||||
if i == 0:
|
|
||||||
if i in columns:
|
|
||||||
out = transliterateString(lineCols[i])
|
|
||||||
else :
|
|
||||||
out = lineCols[i]
|
|
||||||
else :
|
|
||||||
if i in columns:
|
|
||||||
out = out + delimiter + transliterateString(lineCols[i])
|
|
||||||
else :
|
|
||||||
out = out + delimiter + lineCols[i]
|
|
||||||
|
|
||||||
else:
|
|
||||||
out = transliterateString(inString)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
def transliterateString(inString):
|
|
||||||
|
|
||||||
out = ""
|
|
||||||
|
|
||||||
# For normal Buckwalter -> Unicode transliteration..
|
|
||||||
if not reverse:
|
|
||||||
|
|
||||||
# Loop over each character in the string, inString.
|
|
||||||
for char in inString:
|
|
||||||
# Look up current char in the dictionary to get its
|
|
||||||
# respective value. If there is no match, e.g., chars like
|
|
||||||
# spaces, then just stick with the current char without any
|
|
||||||
# conversion.
|
|
||||||
out = out + buck2uni.get(char, char)
|
|
||||||
|
|
||||||
# Same as above, just in the other direction.
|
|
||||||
else:
|
|
||||||
|
|
||||||
for char in inString:
|
|
||||||
out = out + uni2buck.get(char, char)
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
#while 1:
|
|
||||||
# line = inFile.readline().strip()
|
|
||||||
# line = line.decode(inEnc)
|
|
||||||
# if not line:
|
|
||||||
# break
|
|
||||||
|
|
||||||
# process string
|
|
||||||
# outFile.write(transliterate(line) + os.linesep)
|
|
||||||
|
|
||||||
# Read in the lines of the input file.
|
|
||||||
lines = inFile.readlines()
|
|
||||||
|
|
||||||
currentLineNumber = 1
|
|
||||||
# Loop over each line
|
|
||||||
for line in lines:
|
|
||||||
line = line.strip()
|
|
||||||
try:
|
|
||||||
# Transliterate the current line, and then write the output to
|
|
||||||
# file.
|
|
||||||
|
|
||||||
if not ignoreChars:
|
|
||||||
outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
|
|
||||||
else:
|
|
||||||
if line[0] in parseIgnoreString(ignoreChars):
|
|
||||||
outFile.write(line + " " + os.linesep)
|
|
||||||
else:
|
|
||||||
outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
|
|
||||||
|
|
||||||
currentLineNumber = currentLineNumber + 1
|
|
||||||
|
|
||||||
except UnicodeError, msg:
|
|
||||||
# A problem when writing occurred. Report to user...
|
|
||||||
print msg
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# All done! Better close the files used before terminating...
|
|
||||||
inFile.close()
|
|
||||||
outFile.close()
|
|
||||||
|
|
||||||
# ... and relax! :)
|
|
@ -1,14 +1,13 @@
|
|||||||
Name: hunspell-ar
|
Name: hunspell-ar
|
||||||
Summary: Arabic hunspell dictionaries
|
Summary: Arabic hunspell dictionaries
|
||||||
%define upstreamid 20060208
|
%define upstreamid 20080110
|
||||||
Version: 0.%{upstreamid}
|
Version: 0.%{upstreamid}
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Source0: http://hunspell.sourceforge.net/buckwalter_morphan_1_hunspell_patch.tar.gz
|
Source: http://downloads.sourceforge.net/ayaspell/hunspell-ar_%{upstreamid}.tar.gz
|
||||||
Source1: http://www.andy-roberts.net/software/buckwalter2unicode/buckwalter2unicode.py
|
|
||||||
Group: Applications/Text
|
Group: Applications/Text
|
||||||
URL: http://hunspell.sourceforge.net
|
URL: http://ayaspell.sourceforge.net/
|
||||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
|
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
|
||||||
License: GPL
|
License: GPLv2 or LGPLv2 or MPLv1.1
|
||||||
BuildArch: noarch
|
BuildArch: noarch
|
||||||
|
|
||||||
Requires: hunspell
|
Requires: hunspell
|
||||||
@ -17,33 +16,21 @@ Requires: hunspell
|
|||||||
Arabic (Egypt, Algeria, etc.) hunspell dictionaries
|
Arabic (Egypt, Algeria, etc.) hunspell dictionaries
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -q -n buckwalter_morphan_1_hunspell_patch
|
%setup -q -n %{name}_%{upstreamid}
|
||||||
cp %{SOURCE1} convert2hunspell
|
|
||||||
chmod +x convert2hunspell/buckwalter2unicode.py
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
cd convert2hunspell
|
|
||||||
make
|
|
||||||
|
|
||||||
./buckwalter2unicode.py -i arabic_alias.dic -o ../ar_LB.dic
|
|
||||||
grep -v '^.FX' arabic_alias.aff > nonFX
|
|
||||||
grep '^.FX' arabic_alias.aff > FX.in
|
|
||||||
./buckwalter2unicode.py -i FX.in -d ' ' -c 4 -o FX
|
|
||||||
cat nonFX FX > ../ar_LB.aff
|
|
||||||
|
|
||||||
tr -d '\r' < ../doc/readme.txt > ../doc/readme.txt.new
|
|
||||||
mv -f ../doc/readme.txt.new ../doc/readme.txt
|
|
||||||
|
|
||||||
%install
|
%install
|
||||||
rm -rf $RPM_BUILD_ROOT
|
rm -rf $RPM_BUILD_ROOT
|
||||||
mkdir -p $RPM_BUILD_ROOT/%{_datadir}/myspell
|
mkdir -p $RPM_BUILD_ROOT/%{_datadir}/myspell
|
||||||
cp -p *.dic *.aff $RPM_BUILD_ROOT/%{_datadir}/myspell
|
cp -p ar.dic $RPM_BUILD_ROOT/%{_datadir}/myspell/ar_TN.dic
|
||||||
|
cp -p ar.aff $RPM_BUILD_ROOT/%{_datadir}/myspell/ar_TN.aff
|
||||||
|
|
||||||
pushd $RPM_BUILD_ROOT/%{_datadir}/myspell/
|
pushd $RPM_BUILD_ROOT/%{_datadir}/myspell/
|
||||||
ar_LB_aliases="ar_AE ar_BH ar_DZ ar_EG ar_IN ar_IQ ar_JO ar_KW ar_LY ar_MA ar_OM ar_QA ar_SA ar_SD ar_SY ar_TN ar_YE"
|
ar_TN_aliases="ar_AE ar_BH ar_DZ ar_EG ar_IN ar_IQ ar_JO ar_LB ar_KW ar_LY ar_MA ar_OM ar_QA ar_SA ar_SD ar_SY ar_YE"
|
||||||
for lang in $ar_LB_aliases; do
|
for lang in $ar_TN_aliases; do
|
||||||
ln -s ar_LB.aff $lang.aff
|
ln -s ar_TN.aff $lang.aff
|
||||||
ln -s ar_LB.dic $lang.dic
|
ln -s ar_TN.dic $lang.dic
|
||||||
done
|
done
|
||||||
popd
|
popd
|
||||||
|
|
||||||
@ -52,9 +39,12 @@ rm -rf $RPM_BUILD_ROOT
|
|||||||
|
|
||||||
%files
|
%files
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
%doc gpl.txt doc/readme.txt convert2hunspell/README
|
%doc AUTHORS ChangeLog-ar COPYING README-ar THANKS
|
||||||
%{_datadir}/myspell/*
|
%{_datadir}/myspell/*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Apr 28 2008 Caolan McNamara <caolanm@redhat.com> - 0.20080110-1
|
||||||
|
- use the much lighter ayaspell data instead of Buckwalter
|
||||||
|
|
||||||
* Wed Jun 06 2007 Caolan McNamara <caolanm@redhat.com> - 0.20060208-1
|
* Wed Jun 06 2007 Caolan McNamara <caolanm@redhat.com> - 0.20060208-1
|
||||||
- initial version
|
- initial version
|
||||||
|
Loading…
Reference in New Issue
Block a user