diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..79ab546 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/wn2ooo20050723.tgz diff --git a/EMPTY b/EMPTY deleted file mode 100644 index 0519ecb..0000000 --- a/EMPTY +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/mythes-en.python3.patch b/mythes-en.python3.patch new file mode 100644 index 0000000..bf894af --- /dev/null +++ b/mythes-en.python3.patch @@ -0,0 +1,1631 @@ +diff -ru mythes-en-3.0/wn2ooo/wn2ooo.py mythes-en-3.0.fixed/wn2ooo/wn2ooo.py +--- mythes-en-3.0/wn2ooo/wn2ooo.py 2005-07-23 23:21:20.000000000 +0100 ++++ mythes-en-3.0.fixed/wn2ooo/wn2ooo.py 2018-07-30 14:46:52.695201526 +0100 +@@ -65,7 +65,7 @@ + entry = getRelatedTerms(u, HYPERNYM, '') + try: + listpos = l.index(entry) +- except ValueError, e: ++ except ValueError as e: + l.append(entry) + return str.join("|", l) + +@@ -74,12 +74,12 @@ + for word in words: + l = [] + if c % 100 == 0: +- print >> sys.stderr, "Working on word %d" % c ++ print("Working on word %d" % c, file=sys.stderr) + for pos in [ADJ, N, V, ADV]: + try: + for s in pos[word].getSenses(): + l.append(s) +- except KeyError, e: ++ except KeyError as e: + #print >> sys.stderr, e + continue + syn_count = 0 +@@ -118,7 +118,7 @@ + syn_lines = "%s%s" % (syn_lines, more_generic_terms) + syn_count = syn_count + 1 + if syn_count > 0: +- print "%s|%d\n%s" % (word, syn_count, syn_lines) ++ print("%s|%d\n%s" % (word, syn_count, syn_lines)) + c = c + 1 + return + +@@ -132,40 +132,38 @@ + return s + + def main(): +- print "ISO8859-1" ++ print("ISO8859-1") + + words = {} + dic = Dictionary(ADJECTIVE, "adj") +- for w in dic.keys(): ++ for w in list(dic.keys()): + words[w] = None + + dic = Dictionary(NOUN, "noun") +- for w in dic.keys(): ++ for w in list(dic.keys()): + words[w] = None + + dic = Dictionary(VERB, "verb") +- for w in dic.keys(): ++ for w in list(dic.keys()): + words[w] = None + + dic = Dictionary(ADVERB, "adv") +- for w in dic.keys(): ++ for w in list(dic.keys()): + words[w] = None + +- words = words.keys() ++ words = list(words.keys()) + # tests: + #words = ['dog', 'house', 'nullipara'] + #words = ['absent', 'whistle stop'] + #words = ['war'] +- print >>sys.stderr, "Dictionaries contain %d words" % len(words) +- print >>sys.stderr, "Sorting..." +- words.sort(mycompare) ++ print("Dictionaries contain %d words" % len(words), file=sys.stderr) ++ print("Sorting...", file=sys.stderr) ++ words = sorted(words, key=mycompare) + printSynsForWords(words) + return + +-def mycompare(a, b): ++def mycompare(elem): + # stupid hack to make sure the list is sorted like Kevin's original list: +- a = a.replace(" ", "Z") +- b = b.replace(" ", "Z") +- return cmp(a, b) ++ return elem.replace(" ", "Z") + + main() +diff -ru mythes-en-3.0/wn2ooo/wordnet.py mythes-en-3.0.fixed/wn2ooo/wordnet.py +--- mythes-en-3.0/wn2ooo/wordnet.py 2005-07-23 23:21:16.000000000 +0100 ++++ mythes-en-3.0.fixed/wn2ooo/wordnet.py 2018-07-30 14:46:52.695201526 +0100 +@@ -44,7 +44,6 @@ + import string + import os + from os import environ +-from types import IntType, ListType, StringType, TupleType + + + # +@@ -212,15 +211,15 @@ + + def __init__(self, line): + """Initialize the word from a line of a WN POS file.""" +- tokens = string.split(line) +- ints = map(int, tokens[int(tokens[3]) + 4:]) +- self.form = string.replace(tokens[0], '_', ' ') ++ tokens = line.split() ++ ints = list(map(int, tokens[int(tokens[3]) + 4:])) ++ self.form = tokens[0].replace('_', ' ') + "Orthographic representation of the word." +- self.pos = _normalizePOS(tokens[1]) ++ self.pos = _normalizePOS(tokens[1]) + "Part of speech. One of NOUN, VERB, ADJECTIVE, ADVERB." +- self.taggedSenseCount = ints[1] ++ self.taggedSenseCount = ints[1] + "Number of senses that are tagged." +- self._synsetOffsets = ints[2:ints[0]+2] ++ self._synsetOffsets = ints[2:ints[0]+2] + + def getPointers(self, pointerType=None): + """Pointers connect senses and synsets, not words. +@@ -233,17 +232,17 @@ + raise self.getPointers.__doc__ + + def getSenses(self): +- """Return a sequence of senses. +- +- >>> N['dog'].getSenses() +- ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron}) +- """ +- if not hasattr(self, '_senses'): +- def getSense(offset, pos=self.pos, form=self.form): +- return getSynset(pos, offset)[form] +- self._senses = tuple(map(getSense, self._synsetOffsets)) +- del self._synsetOffsets +- return self._senses ++ """Return a sequence of senses. ++ ++ >>> N['dog'].getSenses() ++ ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron}) ++ """ ++ if not hasattr(self, '_senses'): ++ def getSense(offset, pos=self.pos, form=self.form): ++ return getSynset(pos, offset)[form] ++ self._senses = tuple(map(getSense, self._synsetOffsets)) ++ del self._synsetOffsets ++ return self._senses + + # Deprecated. Present for backwards compatability. + def senses(self): +@@ -255,70 +254,70 @@ + return self.getSense() + + def isTagged(self): +- """Return 1 if any sense is tagged. +- +- >>> N['dog'].isTagged() +- 1 +- """ +- return self.taggedSenseCount > 0 ++ """Return 1 if any sense is tagged. ++ ++ >>> N['dog'].isTagged() ++ 1 ++ """ ++ return self.taggedSenseCount > 0 + + def getAdjectivePositions(self): +- """Return a sequence of adjective positions that this word can +- appear in. These are elements of ADJECTIVE_POSITIONS. +- +- >>> ADJ['clear'].getAdjectivePositions() +- [None, 'predicative'] +- """ +- positions = {} +- for sense in self.getSenses(): +- positions[sense.position] = 1 +- return positions.keys() ++ """Return a sequence of adjective positions that this word can ++ appear in. These are elements of ADJECTIVE_POSITIONS. ++ ++ >>> ADJ['clear'].getAdjectivePositions() ++ [None, 'predicative'] ++ """ ++ positions = {} ++ for sense in self.getSenses(): ++ positions[sense.position] = 1 ++ return list(positions.keys()) + + adjectivePositions = getAdjectivePositions # backwards compatability + + def __cmp__(self, other): +- """ +- >>> N['cat'] < N['dog'] +- 1 +- >>> N['dog'] < V['dog'] +- 1 +- """ +- return _compareInstances(self, other, ('pos', 'form')) ++ """ ++ >>> N['cat'] < N['dog'] ++ 1 ++ >>> N['dog'] < V['dog'] ++ 1 ++ """ ++ return _compareInstances(self, other, ('pos', 'form')) + + def __str__(self): +- """Return a human-readable representation. +- +- >>> str(N['dog']) +- 'dog(n.)' +- """ +- abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'} +- return self.form + "(" + abbrs[self.pos] + ")" ++ """Return a human-readable representation. ++ ++ >>> str(N['dog']) ++ 'dog(n.)' ++ """ ++ abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'} ++ return self.form + "(" + abbrs[self.pos] + ")" + + def __repr__(self): +- """If ReadableRepresentations is true, return a human-readable +- representation, e.g. 'dog(n.)'. +- +- If ReadableRepresentations is false, return a machine-readable +- representation, e.g. "getWord('dog', 'noun')". +- """ +- if ReadableRepresentations: +- return str(self) +- return "getWord" + `(self.form, self.pos)` +- ++ """If ReadableRepresentations is true, return a human-readable ++ representation, e.g. 'dog(n.)'. ++ ++ If ReadableRepresentations is false, return a machine-readable ++ representation, e.g. "getWord('dog', 'noun')". ++ """ ++ if ReadableRepresentations: ++ return str(self) ++ return "getWord" + repr((self.form, self.pos)) ++ + # + # Sequence protocol (a Word's elements are its Senses) + # +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __len__(self): +- return len(self.getSenses()) ++ return len(self.getSenses()) + + def __getitem__(self, index): +- return self.getSenses()[index] ++ return self.getSenses()[index] + + def __getslice__(self, i, j): +- return self.getSenses()[i:j] ++ return self.getSenses()[i:j] + + + class Synset: +@@ -356,157 +355,157 @@ + + def __init__(self, pos, offset, line): + "Initialize the synset from a line off a WN synset file." +- self.pos = pos ++ self.pos = pos + "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB." +- self.offset = offset ++ self.offset = offset + """integer offset into the part-of-speech file. Together + with pos, this can be used as a unique id.""" +- tokens = string.split(line[:string.index(line, '|')]) +- self.ssType = tokens[2] +- self.gloss = string.strip(line[string.index(line, '|') + 1:]) ++ tokens = line[:line.index('|')].split() ++ self.ssType = tokens[2] ++ self.gloss = line[line.index('|') + 1:].strip() + self.lexname = Lexname.lexnames[int(tokens[1])] +- (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16)) +- (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0])) +- if pos == VERB: +- (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0])) +- def extractVerbFrames(index, vfTuples): +- return tuple(map(lambda t:string.atoi(t[1]), filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples))) +- senseVerbFrames = [] +- for index in range(1, len(self._senseTuples) + 1): +- senseVerbFrames.append(extractVerbFrames(index, vfTuples)) +- self._senseVerbFrames = senseVerbFrames +- self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) ++ (self._senseTuples, remainder) = _partition(tokens[4:], 2, int(tokens[3], 16)) ++ (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0])) ++ if pos == VERB: ++ (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0])) ++ def extractVerbFrames(index, vfTuples): ++ return tuple([int(t[1]) for t in list(filter(lambda t,i=index:int(t[2],16) in (0, i), vfTuples))]) ++ senseVerbFrames = [] ++ for index in range(1, len(self._senseTuples) + 1): ++ senseVerbFrames.append(extractVerbFrames(index, vfTuples)) ++ self._senseVerbFrames = senseVerbFrames ++ self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) + """A sequence of integers that index into + VERB_FRAME_STRINGS. These list the verb frames that any + Sense in this synset participates in. (See also + Sense.verbFrames.) Defined only for verbs.""" + + def getSenses(self): +- """Return a sequence of Senses. +- +- >>> N['dog'][0].getSenses() +- ('dog' in {noun: dog, domestic dog, Canis familiaris},) +- """ +- if not hasattr(self, '_senses'): +- def loadSense(senseTuple, verbFrames=None, synset=self): +- return Sense(synset, senseTuple, verbFrames) +- if self.pos == VERB: +- self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames)) +- del self._senseVerbFrames +- else: +- self._senses = tuple(map(loadSense, self._senseTuples)) +- del self._senseTuples +- return self._senses ++ """Return a sequence of Senses. ++ ++ >>> N['dog'][0].getSenses() ++ ('dog' in {noun: dog, domestic dog, Canis familiaris},) ++ """ ++ if not hasattr(self, '_senses'): ++ def loadSense(senseTuple, verbFrames=None, synset=self): ++ return Sense(synset, senseTuple, verbFrames) ++ if self.pos == VERB: ++ self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames)) ++ del self._senseVerbFrames ++ else: ++ self._senses = tuple(map(loadSense, self._senseTuples)) ++ del self._senseTuples ++ return self._senses + + senses = getSenses + + def getPointers(self, pointerType=None): +- """Return a sequence of Pointers. ++ """Return a sequence of Pointers. + + If pointerType is specified, only pointers of that type are + returned. In this case, pointerType should be an element of + POINTER_TYPES. +- +- >>> N['dog'][0].getPointers()[:5] +- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) +- >>> N['dog'][0].getPointers(HYPERNYM) +- (hypernym -> {noun: canine, canid},) +- """ +- if not hasattr(self, '_pointers'): +- def loadPointer(tuple, synset=self): +- return Pointer(synset.offset, tuple) +- self._pointers = tuple(map(loadPointer, self._pointerTuples)) +- del self._pointerTuples +- if pointerType == None: +- return self._pointers +- else: +- _requirePointerType(pointerType) +- return filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers) ++ ++ >>> N['dog'][0].getPointers()[:5] ++ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) ++ >>> N['dog'][0].getPointers(HYPERNYM) ++ (hypernym -> {noun: canine, canid},) ++ """ ++ if not hasattr(self, '_pointers'): ++ def loadPointer(tuple, synset=self): ++ return Pointer(synset.offset, tuple) ++ self._pointers = tuple(map(loadPointer, self._pointerTuples)) ++ del self._pointerTuples ++ if pointerType == None: ++ return self._pointers ++ else: ++ _requirePointerType(pointerType) ++ return list(filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers)) + + pointers = getPointers # backwards compatability + + def getPointerTargets(self, pointerType=None): +- """Return a sequence of Senses or Synsets. +- ++ """Return a sequence of Senses or Synsets. ++ + If pointerType is specified, only targets of pointers of that + type are returned. In this case, pointerType should be an + element of POINTER_TYPES. +- +- >>> N['dog'][0].getPointerTargets()[:5] +- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] +- >>> N['dog'][0].getPointerTargets(HYPERNYM) +- [{noun: canine, canid}] +- """ +- return map(Pointer.target, self.getPointers(pointerType)) ++ ++ >>> N['dog'][0].getPointerTargets()[:5] ++ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] ++ >>> N['dog'][0].getPointerTargets(HYPERNYM) ++ [{noun: canine, canid}] ++ """ ++ return list(map(Pointer.target, self.getPointers(pointerType))) + + pointerTargets = getPointerTargets # backwards compatability + + def isTagged(self): +- """Return 1 if any sense is tagged. +- +- >>> N['dog'][0].isTagged() +- 1 +- >>> N['dog'][1].isTagged() +- 0 +- """ +- return len(filter(Sense.isTagged, self.getSenses())) > 0 ++ """Return 1 if any sense is tagged. ++ ++ >>> N['dog'][0].isTagged() ++ 1 ++ >>> N['dog'][1].isTagged() ++ 0 ++ """ ++ return len(list(filter(Sense.isTagged, self.getSenses()))) > 0 + + def __str__(self): +- """Return a human-readable representation. +- +- >>> str(N['dog'][0].synset) +- '{noun: dog, domestic dog, Canis familiaris}' +- """ +- return "{" + self.pos + ": " + string.joinfields(map(lambda sense:sense.form, self.getSenses()), ", ") + "}" ++ """Return a human-readable representation. ++ ++ >>> str(N['dog'][0].synset) ++ '{noun: dog, domestic dog, Canis familiaris}' ++ """ ++ return "{" + self.pos + ": " + string.joinfields([sense.form for sense in self.getSenses()], ", ") + "}" + + def __repr__(self): +- """If ReadableRepresentations is true, return a human-readable +- representation, e.g. 'dog(n.)'. +- +- If ReadableRepresentations is false, return a machine-readable +- representation, e.g. "getSynset(pos, 1234)". +- """ +- if ReadableRepresentations: +- return str(self) +- return "getSynset" + `(self.pos, self.offset)` ++ """If ReadableRepresentations is true, return a human-readable ++ representation, e.g. 'dog(n.)'. ++ ++ If ReadableRepresentations is false, return a machine-readable ++ representation, e.g. "getSynset(pos, 1234)". ++ """ ++ if ReadableRepresentations: ++ return str(self) ++ return "getSynset" + repr((self.pos, self.offset)) + + def __cmp__(self, other): +- return _compareInstances(self, other, ('pos', 'offset')) ++ return _compareInstances(self, other, ('pos', 'offset')) + + # + # Sequence protocol (a Synset's elements are its senses). + # +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __len__(self): +- """ +- >>> len(N['dog'][0].synset) +- 3 +- """ +- return len(self.getSenses()) ++ """ ++ >>> len(N['dog'][0].synset) ++ 3 ++ """ ++ return len(self.getSenses()) + + def __getitem__(self, idx): +- """ +- >>> N['dog'][0].synset[0] == N['dog'][0] +- 1 +- >>> N['dog'][0].synset['dog'] == N['dog'][0] +- 1 +- >>> N['dog'][0].synset[N['dog']] == N['dog'][0] +- 1 +- >>> N['cat'][6] +- 'cat' in {noun: big cat, cat} +- """ +- senses = self.getSenses() +- if isinstance(idx, Word): +- idx = idx.form +- if isinstance(idx, StringType): +- idx = _index(idx, map(lambda sense:sense.form, senses)) or \ +- _index(idx, map(lambda sense:sense.form, senses), _equalsIgnoreCase) +- return senses[idx] ++ """ ++ >>> N['dog'][0].synset[0] == N['dog'][0] ++ 1 ++ >>> N['dog'][0].synset['dog'] == N['dog'][0] ++ 1 ++ >>> N['dog'][0].synset[N['dog']] == N['dog'][0] ++ 1 ++ >>> N['cat'][6] ++ 'cat' in {noun: big cat, cat} ++ """ ++ senses = self.getSenses() ++ if isinstance(idx, Word): ++ idx = idx.form ++ if isinstance(idx, str): ++ idx = _index(idx, [sense.form for sense in senses]) or \ ++ _index(idx, [sense.form for sense in senses], _equalsIgnoreCase) ++ return senses[idx] + + def __getslice__(self, i, j): +- return self.getSenses()[i:j] ++ return self.getSenses()[i:j] + + + class Sense: +@@ -527,7 +526,7 @@ + VERB_FRAME_STRINGS. These list the verb frames that this + Sense partipates in. Defined only for verbs. + +- >>> decide = V['decide'][0].synset # first synset for 'decide' ++ >>> decide = V['decide'][0].synset # first synset for 'decide' + >>> decide[0].verbFrames + (8, 2, 26, 29) + >>> decide[1].verbFrames +@@ -538,124 +537,124 @@ + + def __init__(sense, synset, senseTuple, verbFrames=None): + "Initialize a sense from a synset's senseTuple." +- # synset is stored by key (pos, synset) rather than object +- # reference, to avoid creating a circular reference between +- # Senses and Synsets that will prevent the vm from +- # garbage-collecting them. +- sense.pos = synset.pos ++ # synset is stored by key (pos, synset) rather than object ++ # reference, to avoid creating a circular reference between ++ # Senses and Synsets that will prevent the vm from ++ # garbage-collecting them. ++ sense.pos = synset.pos + "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB" +- sense.synsetOffset = synset.offset ++ sense.synsetOffset = synset.offset + "synset key. This is used to retrieve the sense." +- sense.verbFrames = verbFrames ++ sense.verbFrames = verbFrames + """A sequence of integers that index into + VERB_FRAME_STRINGS. These list the verb frames that this + Sense partipates in. Defined only for verbs.""" +- (form, idString) = senseTuple +- sense.position = None +- if '(' in form: +- index = string.index(form, '(') +- key = form[index + 1:-1] +- form = form[:index] +- if key == 'a': +- sense.position = ATTRIBUTIVE +- elif key == 'p': +- sense.position = PREDICATIVE +- elif key == 'ip': +- sense.position = IMMEDIATE_POSTNOMINAL +- else: +- raise "unknown attribute " + key +- sense.form = string.replace(form, '_', ' ') ++ (form, idString) = senseTuple ++ sense.position = None ++ if '(' in form: ++ index = form.index('(') ++ key = form[index + 1:-1] ++ form = form[:index] ++ if key == 'a': ++ sense.position = ATTRIBUTIVE ++ elif key == 'p': ++ sense.position = PREDICATIVE ++ elif key == 'ip': ++ sense.position = IMMEDIATE_POSTNOMINAL ++ else: ++ raise "unknown attribute " + key ++ sense.form = form.replace('_', ' ') + "orthographic representation of the Word this is a Sense of." + + def __getattr__(self, name): +- # see the note at __init__ about why 'synset' is provided as a +- # 'virtual' slot +- if name == 'synset': +- return getSynset(self.pos, self.synsetOffset) ++ # see the note at __init__ about why 'synset' is provided as a ++ # 'virtual' slot ++ if name == 'synset': ++ return getSynset(self.pos, self.synsetOffset) + elif name == 'lexname': + return self.synset.lexname +- else: +- raise AttributeError, name ++ else: ++ raise AttributeError(name) + + def __str__(self): +- """Return a human-readable representation. +- +- >>> str(N['dog']) +- 'dog(n.)' +- """ +- return `self.form` + " in " + str(self.synset) ++ """Return a human-readable representation. ++ ++ >>> str(N['dog']) ++ 'dog(n.)' ++ """ ++ return repr(self.form) + " in " + str(self.synset) + + def __repr__(self): +- """If ReadableRepresentations is true, return a human-readable +- representation, e.g. 'dog(n.)'. +- +- If ReadableRepresentations is false, return a machine-readable +- representation, e.g. "getWord('dog', 'noun')". +- """ +- if ReadableRepresentations: +- return str(self) +- return "%s[%s]" % (`self.synset`, `self.form`) ++ """If ReadableRepresentations is true, return a human-readable ++ representation, e.g. 'dog(n.)'. ++ ++ If ReadableRepresentations is false, return a machine-readable ++ representation, e.g. "getWord('dog', 'noun')". ++ """ ++ if ReadableRepresentations: ++ return str(self) ++ return "%s[%s]" % (repr(self.synset), repr(self.form)) + + def getPointers(self, pointerType=None): +- """Return a sequence of Pointers. +- ++ """Return a sequence of Pointers. ++ + If pointerType is specified, only pointers of that type are + returned. In this case, pointerType should be an element of + POINTER_TYPES. +- +- >>> N['dog'][0].getPointers()[:5] +- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) +- >>> N['dog'][0].getPointers(HYPERNYM) +- (hypernym -> {noun: canine, canid},) +- """ +- senseIndex = _index(self, self.synset.getSenses()) +- def pointsFromThisSense(pointer, selfIndex=senseIndex): +- return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex +- return filter(pointsFromThisSense, self.synset.getPointers(pointerType)) ++ ++ >>> N['dog'][0].getPointers()[:5] ++ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) ++ >>> N['dog'][0].getPointers(HYPERNYM) ++ (hypernym -> {noun: canine, canid},) ++ """ ++ senseIndex = _index(self, self.synset.getSenses()) ++ def pointsFromThisSense(pointer, selfIndex=senseIndex): ++ return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex ++ return list(filter(pointsFromThisSense, self.synset.getPointers(pointerType))) + + pointers = getPointers # backwards compatability + + def getPointerTargets(self, pointerType=None): +- """Return a sequence of Senses or Synsets. +- ++ """Return a sequence of Senses or Synsets. ++ + If pointerType is specified, only targets of pointers of that + type are returned. In this case, pointerType should be an + element of POINTER_TYPES. +- +- >>> N['dog'][0].getPointerTargets()[:5] +- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] +- >>> N['dog'][0].getPointerTargets(HYPERNYM) +- [{noun: canine, canid}] +- """ +- return map(Pointer.target, self.getPointers(pointerType)) ++ ++ >>> N['dog'][0].getPointerTargets()[:5] ++ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] ++ >>> N['dog'][0].getPointerTargets(HYPERNYM) ++ [{noun: canine, canid}] ++ """ ++ return list(map(Pointer.target, self.getPointers(pointerType))) + + pointerTargets = getPointerTargets # backwards compatability + + def getSenses(self): +- return self, ++ return self, + + senses = getSenses # backwards compatability + + def isTagged(self): +- """Return 1 if any sense is tagged. +- +- >>> N['dog'][0].isTagged() +- 1 +- >>> N['dog'][1].isTagged() +- 0 +- """ +- word = self.word() +- return _index(self, word.getSenses()) < word.taggedSenseCount ++ """Return 1 if any sense is tagged. ++ ++ >>> N['dog'][0].isTagged() ++ 1 ++ >>> N['dog'][1].isTagged() ++ 0 ++ """ ++ word = self.word() ++ return _index(self, word.getSenses()) < word.taggedSenseCount + + def getWord(self): +- return getWord(self.form, self.pos) ++ return getWord(self.form, self.pos) + + word = getWord # backwards compatability + + def __cmp__(self, other): +- def senseIndex(sense, synset=self.synset): +- return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form) +- return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other)) ++ def senseIndex(sense, synset=self.synset): ++ return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form) ++ return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other)) + + + class Pointer: +@@ -670,21 +669,21 @@ + """ + + _POINTER_TYPE_TABLE = { +- '!': ANTONYM, ++ '!': ANTONYM, + '@': HYPERNYM, + '~': HYPONYM, +- '=': ATTRIBUTE, ++ '=': ATTRIBUTE, + '^': ALSO_SEE, + '*': ENTAILMENT, + '>': CAUSE, +- '$': VERB_GROUP, +- '#m': MEMBER_MERONYM, ++ '$': VERB_GROUP, ++ '#m': MEMBER_MERONYM, + '#s': SUBSTANCE_MERONYM, + '#p': PART_MERONYM, +- '%m': MEMBER_HOLONYM, ++ '%m': MEMBER_HOLONYM, + '%s': SUBSTANCE_HOLONYM, + '%p': PART_HOLONYM, +- '&': SIMILAR, ++ '&': SIMILAR, + '<': PARTICIPLE_OF, + '\\': PERTAINYM, + # New in wn 2.0: +@@ -698,57 +697,57 @@ + } + + def __init__(self, sourceOffset, pointerTuple): +- (type, offset, pos, indices) = pointerTuple +- # dnaber: try to adapt to WordNet 2.1: +- if type == "@i": +- type = "@" +- if type == "~i": +- type = "~" +- # /dnaber +- self.type = Pointer._POINTER_TYPE_TABLE[type] ++ (type, offset, pos, indices) = pointerTuple ++ # dnaber: try to adapt to WordNet 2.1: ++ if type == "@i": ++ type = "@" ++ if type == "~i": ++ type = "~" ++ # /dnaber ++ self.type = Pointer._POINTER_TYPE_TABLE[type] + """One of POINTER_TYPES.""" +- self.sourceOffset = sourceOffset +- self.targetOffset = int(offset) +- self.pos = _normalizePOS(pos) ++ self.sourceOffset = sourceOffset ++ self.targetOffset = int(offset) ++ self.pos = _normalizePOS(pos) + """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB""" +- indices = string.atoi(indices, 16) +- self.sourceIndex = indices >> 8 +- self.targetIndex = indices & 255 ++ indices = int(indices, 16) ++ self.sourceIndex = indices >> 8 ++ self.targetIndex = indices & 255 + + def getSource(self): +- synset = getSynset(self.pos, self.sourceOffset) +- if self.sourceIndex: +- return synset[self.sourceIndex - 1] +- else: +- return synset ++ synset = getSynset(self.pos, self.sourceOffset) ++ if self.sourceIndex: ++ return synset[self.sourceIndex - 1] ++ else: ++ return synset + + source = getSource # backwards compatability + + def getTarget(self): +- synset = getSynset(self.pos, self.targetOffset) +- if self.targetIndex: +- return synset[self.targetIndex - 1] +- else: +- return synset ++ synset = getSynset(self.pos, self.targetOffset) ++ if self.targetIndex: ++ return synset[self.targetIndex - 1] ++ else: ++ return synset + + target = getTarget # backwards compatability + + def __str__(self): +- return self.type + " -> " + str(self.target()) ++ return self.type + " -> " + str(self.target()) + + def __repr__(self): +- if ReadableRepresentations: +- return str(self) +- return "<" + str(self) + ">" ++ if ReadableRepresentations: ++ return str(self) ++ return "<" + str(self) + ">" + + def __cmp__(self, other): +- diff = _compareInstances(self, other, ('pos', 'sourceOffset')) +- if diff: +- return diff +- synset = self.source() +- def pointerIndex(sense, synset=synset): +- return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex'))) +- return cmp(pointerIndex(self), pointerIndex(other)) ++ diff = _compareInstances(self, other, ('pos', 'sourceOffset')) ++ if diff: ++ return diff ++ synset = self.source() ++ def pointerIndex(sense, synset=synset): ++ return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex'))) ++ return cmp(pointerIndex(self), pointerIndex(other)) + + + # Loading the lexnames +@@ -769,7 +768,7 @@ + + def setupLexnames(): + for l in open(WNSEARCHDIR+'/lexnames').readlines(): +- i,name,category = string.split(l) ++ i,name,category = l.split() + Lexname(name,PartsOfSpeech[int(category)-1]) + + setupLexnames() +@@ -802,59 +801,59 @@ + """ + + def __init__(self, pos, filenameroot): +- self.pos = pos ++ self.pos = pos + """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB""" +- self.indexFile = _IndexFile(pos, filenameroot) +- self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE) ++ self.indexFile = _IndexFile(pos, filenameroot) ++ self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE) + + def __repr__(self): +- dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'} +- if dictionaryVariables.get(self): +- return self.__module__ + "." + dictionaryVariables[self] +- return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos) ++ dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'} ++ if dictionaryVariables.get(self): ++ return self.__module__ + "." + dictionaryVariables[self] ++ return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos) + + def getWord(self, form, line=None): +- key = string.replace(string.lower(form), ' ', '_') +- pos = self.pos +- def loader(key=key, line=line, indexFile=self.indexFile): +- line = line or indexFile.get(key) +- return line and Word(line) +- word = _entityCache.get((pos, key), loader) +- if word: +- return word +- else: +- raise KeyError, "%s is not in the %s database" % (`form`, `pos`) ++ key = form.lower().replace(' ', '_') ++ pos = self.pos ++ def loader(key=key, line=line, indexFile=self.indexFile): ++ line = line or indexFile.get(key) ++ return line and Word(line) ++ word = _entityCache.get((pos, key), loader) ++ if word != None: ++ return word ++ else: ++ raise KeyError("%s is not in the %s database" % (repr(form), repr(pos))) + + def getSynset(self, offset): +- pos = self.pos +- def loader(pos=pos, offset=offset, dataFile=self.dataFile): +- return Synset(pos, offset, _lineAt(dataFile, offset)) +- return _entityCache.get((pos, offset), loader) ++ pos = self.pos ++ def loader(pos=pos, offset=offset, dataFile=self.dataFile): ++ return Synset(pos, offset, _lineAt(dataFile, offset)) ++ return _entityCache.get((pos, offset), loader) + + def _buildIndexCacheFile(self): +- self.indexFile._buildIndexCacheFile() ++ self.indexFile._buildIndexCacheFile() + + # + # Sequence protocol (a Dictionary's items are its Words) + # +- def __nonzero__(self): +- """Return false. (This is to avoid scanning the whole index file +- to compute len when a Dictionary is used in test position.) +- +- >>> N and 'true' +- 'true' +- """ +- return 1 ++ def __bool__(self): ++ """Return false. (This is to avoid scanning the whole index file ++ to compute len when a Dictionary is used in test position.) ++ ++ >>> N and 'true' ++ 'true' ++ """ ++ return 1 + + def __len__(self): +- """Return the number of index entries. +- +- >>> len(ADJ) +- 21435 +- """ +- if not hasattr(self, 'length'): +- self.length = len(self.indexFile) +- return self.length ++ """Return the number of index entries. ++ ++ >>> len(ADJ) ++ 21435 ++ """ ++ if not hasattr(self, 'length'): ++ self.length = len(self.indexFile) ++ return self.length + + def __getslice__(self, a, b): + results = [] +@@ -868,22 +867,22 @@ + return results + + def __getitem__(self, index): +- """If index is a String, return the Word whose form is +- index. If index is an integer n, return the Word +- indexed by the n'th Word in the Index file. +- +- >>> N['dog'] +- dog(n.) +- >>> N[0] +- 'hood(n.) +- """ +- if isinstance(index, StringType): +- return self.getWord(index) +- elif isinstance(index, IntType): +- line = self.indexFile[index] +- return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line) +- else: +- raise TypeError, "%s is not a String or Int" % `index` ++ """If index is a String, return the Word whose form is ++ index. If index is an integer n, return the Word ++ indexed by the n'th Word in the Index file. ++ ++ >>> N['dog'] ++ dog(n.) ++ >>> N[0] ++ 'hood(n.) ++ """ ++ if isinstance(index, str): ++ return self.getWord(index) ++ elif isinstance(index, int): ++ line = self.indexFile[index] ++ return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line) ++ else: ++ raise TypeError("%s is not a String or Int" % repr(index)) + + # + # Dictionary protocol +@@ -892,54 +891,54 @@ + # + + def get(self, key, default=None): +- """Return the Word whose form is _key_, or _default_. +- +- >>> N.get('dog') +- dog(n.) +- >>> N.get('inu') +- """ +- try: +- return self[key] +- except LookupError: +- return default ++ """Return the Word whose form is _key_, or _default_. ++ ++ >>> N.get('dog') ++ dog(n.) ++ >>> N.get('inu') ++ """ ++ try: ++ return self[key] ++ except LookupError: ++ return default + + def keys(self): +- """Return a sorted list of strings that index words in this +- dictionary.""" +- return self.indexFile.keys() ++ """Return a sorted list of strings that index words in this ++ dictionary.""" ++ return list(self.indexFile.keys()) + + def has_key(self, form): +- """Return true iff the argument indexes a word in this dictionary. +- +- >>> N.has_key('dog') +- 1 +- >>> N.has_key('inu') +- 0 +- """ +- return self.indexFile.has_key(form) ++ """Return true iff the argument indexes a word in this dictionary. ++ ++ >>> N.has_key('dog') ++ 1 ++ >>> N.has_key('inu') ++ 0 ++ """ ++ return form in self.indexFile + + # + # Testing + # + + def _testKeys(self): +- """Verify that index lookup can find each word in the index file.""" +- print "Testing: ", self +- file = open(self.indexFile.file.name, _FILE_OPEN_MODE) +- counter = 0 +- while 1: +- line = file.readline() +- if line == '': break +- if line[0] != ' ': +- key = string.replace(line[:string.find(line, ' ')], '_', ' ') +- if (counter % 1000) == 0: +- print "%s..." % (key,), +- import sys +- sys.stdout.flush() +- counter = counter + 1 +- self[key] +- file.close() +- print "done." ++ """Verify that index lookup can find each word in the index file.""" ++ print("Testing: ", self) ++ file = open(self.indexFile.file.name, _FILE_OPEN_MODE) ++ counter = 0 ++ while 1: ++ line = file.readline() ++ if line == '': break ++ if line[0] != ' ': ++ key = string.replace(line[:string.find(line, ' ')], '_', ' ') ++ if (counter % 1000) == 0: ++ print("%s..." % (key,), end=' ') ++ import sys ++ sys.stdout.flush() ++ counter = counter + 1 ++ self[key] ++ file.close() ++ print("done.") + + + class _IndexFile: +@@ -947,69 +946,69 @@ + Sequence and Dictionary interface to a sorted index file.""" + + def __init__(self, pos, filenameroot): +- self.pos = pos +- self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE) +- self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset) +- self.rewind() +- self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx") +- try: +- import shelve +- self.indexCache = shelve.open(self.shelfname, 'r') +- except: +- pass ++ self.pos = pos ++ self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE) ++ self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset) ++ self.rewind() ++ self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx") ++ try: ++ import shelve ++ self.indexCache = shelve.open(self.shelfname, 'r') ++ except: ++ pass + + def rewind(self): +- self.file.seek(0) +- while 1: +- offset = self.file.tell() +- line = self.file.readline() +- if (line[0] != ' '): +- break +- self.nextIndex = 0 +- self.nextOffset = offset ++ self.file.seek(0) ++ while 1: ++ offset = self.file.tell() ++ line = self.file.readline() ++ if (line[0] != ' '): ++ break ++ self.nextIndex = 0 ++ self.nextOffset = offset + + # + # Sequence protocol (an _IndexFile's items are its lines) + # +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __len__(self): +- if hasattr(self, 'indexCache'): +- return len(self.indexCache) +- self.rewind() +- lines = 0 +- while 1: +- line = self.file.readline() +- if line == "": +- break +- lines = lines + 1 +- return lines ++ if hasattr(self, 'indexCache'): ++ return len(self.indexCache) ++ self.rewind() ++ lines = 0 ++ while 1: ++ line = self.file.readline() ++ if line == "": ++ break ++ lines = lines + 1 ++ return lines + +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __getitem__(self, index): +- if isinstance(index, StringType): +- if hasattr(self, 'indexCache'): +- return self.indexCache[index] +- return binarySearchFile(self.file, index, self.offsetLineCache, 8) +- elif isinstance(index, IntType): +- if hasattr(self, 'indexCache'): +- return self.get(self.keys[index]) +- if index < self.nextIndex: +- self.rewind() +- while self.nextIndex <= index: +- self.file.seek(self.nextOffset) +- line = self.file.readline() +- if line == "": +- raise IndexError, "index out of range" +- self.nextIndex = self.nextIndex + 1 +- self.nextOffset = self.file.tell() +- return line +- else: +- raise TypeError, "%s is not a String or Int" % `index` +- ++ if isinstance(index, str): ++ if hasattr(self, 'indexCache'): ++ return self.indexCache[index] ++ return binarySearchFile(self.file, index, self.offsetLineCache, 8) ++ elif isinstance(index, int): ++ if hasattr(self, 'indexCache'): ++ return self.get(self.keys[index]) ++ if index < self.nextIndex: ++ self.rewind() ++ while self.nextIndex <= index: ++ self.file.seek(self.nextOffset) ++ line = self.file.readline() ++ if line == "": ++ raise IndexError("index out of range") ++ self.nextIndex = self.nextIndex + 1 ++ self.nextOffset = self.file.tell() ++ return line ++ else: ++ raise TypeError("%s is not a String or Int" % repr(index)) ++ + # + # Dictionary protocol + # +@@ -1017,62 +1016,62 @@ + # + + def get(self, key, default=None): +- try: +- return self[key] +- except LookupError: +- return default ++ try: ++ return self[key] ++ except LookupError: ++ return default + + def keys(self): +- if hasattr(self, 'indexCache'): +- keys = self.indexCache.keys() +- keys.sort() +- return keys +- else: +- keys = [] +- self.rewind() +- while 1: +- line = self.file.readline() +- if not line: break ++ if hasattr(self, 'indexCache'): ++ keys = list(self.indexCache.keys()) ++ keys.sort() ++ return keys ++ else: ++ keys = [] ++ self.rewind() ++ while 1: ++ line = self.file.readline() ++ if not line: break + key = line.split(' ', 1)[0] +- keys.append(key.replace('_', ' ')) +- return keys ++ keys.append(key.replace('_', ' ')) ++ return keys + + def has_key(self, key): +- key = key.replace(' ', '_') # test case: V['haze over'] +- if hasattr(self, 'indexCache'): +- return self.indexCache.has_key(key) +- return self.get(key) != None ++ key = key.replace(' ', '_') # test case: V['haze over'] ++ if hasattr(self, 'indexCache'): ++ return key in self.indexCache ++ return self.get(key) != None + + # + # Index file + # + + def _buildIndexCacheFile(self): +- import shelve +- import os +- print "Building %s:" % (self.shelfname,), +- tempname = self.shelfname + ".temp" +- try: +- indexCache = shelve.open(tempname) +- self.rewind() +- count = 0 +- while 1: +- offset, line = self.file.tell(), self.file.readline() +- if not line: break +- key = line[:string.find(line, ' ')] +- if (count % 1000) == 0: +- print "%s..." % (key,), +- import sys +- sys.stdout.flush() +- indexCache[key] = line +- count = count + 1 +- indexCache.close() +- os.rename(tempname, self.shelfname) +- finally: +- try: os.remove(tempname) +- except: pass +- print "done." +- self.indexCache = shelve.open(self.shelfname, 'r') ++ import shelve ++ import os ++ print("Building %s:" % (self.shelfname,), end=' ') ++ tempname = self.shelfname + ".temp" ++ try: ++ indexCache = shelve.open(tempname) ++ self.rewind() ++ count = 0 ++ while 1: ++ offset, line = self.file.tell(), self.file.readline() ++ if not line: break ++ key = line[:string.find(line, ' ')] ++ if (count % 1000) == 0: ++ print("%s..." % (key,), end=' ') ++ import sys ++ sys.stdout.flush() ++ indexCache[key] = line ++ count = count + 1 ++ indexCache.close() ++ os.rename(tempname, self.shelfname) ++ finally: ++ try: os.remove(tempname) ++ except: pass ++ print("done.") ++ self.indexCache = shelve.open(self.shelfname, 'r') + + + # +@@ -1099,20 +1098,20 @@ + + def _requirePointerType(pointerType): + if pointerType not in POINTER_TYPES: +- raise TypeError, `pointerType` + " is not a pointer type" ++ raise TypeError(repr(pointerType) + " is not a pointer type") + return pointerType + + def _compareInstances(a, b, fields): + """"Return -1, 0, or 1 according to a comparison first by type, + then by class, and finally by each of fields.""" # " <- for emacs + if not hasattr(b, '__class__'): +- return cmp(type(a), type(b)) ++ return cmp(type(a), type(b)) + elif a.__class__ != b.__class__: +- return cmp(a.__class__, b.__class__) ++ return cmp(a.__class__, b.__class__) + for field in fields: +- diff = cmp(getattr(a, field), getattr(b, field)) +- if diff: +- return diff ++ diff = cmp(getattr(a, field), getattr(b, field)) ++ if diff: ++ return diff + return 0 + + def _equalsIgnoreCase(a, b): +@@ -1123,21 +1122,21 @@ + >>> _equalsIgnoreCase('dOg', 'DOG') + 1 + """ +- return a == b or string.lower(a) == string.lower(b) ++ return a == b or a.lower() == b.lower() + + # + # File utilities + # + def _dataFilePathname(filenameroot): + if os.name in ('dos', 'nt'): +- path = os.path.join(WNSEARCHDIR, filenameroot + ".dat") ++ path = os.path.join(WNSEARCHDIR, filenameroot + ".dat") + if os.path.exists(path): + return path + return os.path.join(WNSEARCHDIR, "data." + filenameroot) + + def _indexFilePathname(filenameroot): + if os.name in ('dos', 'nt'): +- path = os.path.join(WNSEARCHDIR, filenameroot + ".idx") ++ path = os.path.join(WNSEARCHDIR, filenameroot + ".idx") + if os.path.exists(path): + return path + return os.path.join(WNSEARCHDIR, "index." + filenameroot) +@@ -1154,30 +1153,30 @@ + #if count > 20: + # raise "infinite loop" + lastState = start, end +- middle = (start + end) / 2 +- if cache.get(middle): +- offset, line = cache[middle] +- else: +- file.seek(max(0, middle - 1)) +- if middle > 0: +- file.readline() +- offset, line = file.tell(), file.readline() +- if currentDepth < cacheDepth: +- cache[middle] = (offset, line) ++ middle = (start + end) / 2 ++ if cache.get(middle): ++ offset, line = cache[middle] ++ else: ++ file.seek(max(0, middle - 1)) ++ if middle > 0: ++ file.readline() ++ offset, line = file.tell(), file.readline() ++ if currentDepth < cacheDepth: ++ cache[middle] = (offset, line) + #print start, middle, end, offset, line, +- if offset > end: +- assert end != middle - 1, "infinite loop" +- end = middle - 1 +- elif line[:keylen] == key:# and line[keylen + 1] == ' ': +- return line ++ if offset > end: ++ assert end != middle - 1, "infinite loop" ++ end = middle - 1 ++ elif line[:keylen] == key:# and line[keylen + 1] == ' ': ++ return line + #elif offset == end: + # return None +- elif line > key: +- assert end != middle - 1, "infinite loop" +- end = middle - 1 +- elif line < key: +- start = offset + len(line) - 1 +- currentDepth = currentDepth + 1 ++ elif line > key: ++ assert end != middle - 1, "infinite loop" ++ end = middle - 1 ++ elif line < key: ++ start = offset + len(line) - 1 ++ currentDepth = currentDepth + 1 + thisState = start, end + if lastState == thisState: + # detects the condition where we're searching past the end +@@ -1206,12 +1205,12 @@ + """ + index = 0 + for element in sequence: +- value = element +- if keyfn: +- value = keyfn(value) +- if (not testfn and value == key) or (testfn and testfn(value, key)): +- return index +- index = index + 1 ++ value = element ++ if keyfn: ++ value = keyfn(value) ++ if (not testfn and value == key) or (testfn and testfn(value, key)): ++ return index ++ index = index + 1 + return None + + def _partition(sequence, size, count): +@@ -1224,7 +1223,7 @@ + + partitions = [] + for index in range(0, size * count, size): +- partitions.append(sequence[index:index + size]) ++ partitions.append(sequence[index:index + size]) + return (partitions, sequence[size * count:]) + + +@@ -1269,49 +1268,49 @@ + but the two implementations aren't directly comparable.""" + + def __init__(this, capacity): +- this.capacity = capacity +- this.clear() ++ this.capacity = capacity ++ this.clear() + + def clear(this): +- this.values = {} +- this.history = {} +- this.oldestTimestamp = 0 +- this.nextTimestamp = 1 ++ this.values = {} ++ this.history = {} ++ this.oldestTimestamp = 0 ++ this.nextTimestamp = 1 + + def removeOldestEntry(this): +- while this.oldestTimestamp < this.nextTimestamp: +- if this.history.get(this.oldestTimestamp): +- key = this.history[this.oldestTimestamp] +- del this.history[this.oldestTimestamp] +- del this.values[key] +- return +- this.oldestTimestamp = this.oldestTimestamp + 1 ++ while this.oldestTimestamp < this.nextTimestamp: ++ if this.history.get(this.oldestTimestamp): ++ key = this.history[this.oldestTimestamp] ++ del this.history[this.oldestTimestamp] ++ del this.values[key] ++ return ++ this.oldestTimestamp = this.oldestTimestamp + 1 + + def setCapacity(this, capacity): +- if capacity == 0: +- this.clear() +- else: +- this.capacity = capacity +- while len(this.values) > this.capacity: +- this.removeOldestEntry() ++ if capacity == 0: ++ this.clear() ++ else: ++ this.capacity = capacity ++ while len(this.values) > this.capacity: ++ this.removeOldestEntry() + + def get(this, key, loadfn=None): +- value = None +- if this.values: +- pair = this.values.get(key) +- if pair: +- (value, timestamp) = pair +- del this.history[timestamp] +- if value == None: +- value = loadfn and loadfn() +- if this.values != None: +- timestamp = this.nextTimestamp +- this.nextTimestamp = this.nextTimestamp + 1 +- this.values[key] = (value, timestamp) +- this.history[timestamp] = key +- if len(this.values) > this.capacity: +- this.removeOldestEntry() +- return value ++ value = None ++ if this.values: ++ pair = this.values.get(key) ++ if pair: ++ (value, timestamp) = pair ++ del this.history[timestamp] ++ if value == None: ++ value = loadfn and loadfn() ++ if this.values != None: ++ timestamp = this.nextTimestamp ++ this.nextTimestamp = this.nextTimestamp + 1 ++ this.values[key] = (value, timestamp) ++ this.history[timestamp] = key ++ if len(this.values) > this.capacity: ++ this.removeOldestEntry() ++ return value + + + class _NullCache: +@@ -1319,10 +1318,10 @@ + LRUCache implements), but doesn't store any values.""" + + def clear(): +- pass ++ pass + + def get(this, key, loadfn=None): +- return loadfn and loadfn() ++ return loadfn and loadfn() + + + DEFAULT_CACHE_CAPACITY = 1000 +@@ -1335,7 +1334,7 @@ + def enableCache(): + """Enable the entity cache.""" + if not isinstance(_entityCache, LRUCache): +- _entityCache = _LRUCache(size) ++ _entityCache = _LRUCache(size) + + def clearCache(): + """Clear the entity cache.""" +@@ -1373,36 +1372,36 @@ + _POSNormalizationTable = {} + _POStoDictionaryTable = {} + for pos, abbreviations in ( +- (NOUN, "noun n n."), +- (VERB, "verb v v."), +- (ADJECTIVE, "adjective adj adj. a s"), +- (ADVERB, "adverb adv adv. r")): +- tokens = string.split(abbreviations) +- for token in tokens: +- _POSNormalizationTable[token] = pos +- _POSNormalizationTable[string.upper(token)] = pos ++ (NOUN, "noun n n."), ++ (VERB, "verb v v."), ++ (ADJECTIVE, "adjective adj adj. a s"), ++ (ADVERB, "adverb adv adv. r")): ++ tokens = abbreviations.split() ++ for token in tokens: ++ _POSNormalizationTable[token] = pos ++ _POSNormalizationTable[token.upper()] = pos + for dict in Dictionaries: +- _POSNormalizationTable[dict] = dict.pos +- _POStoDictionaryTable[dict.pos] = dict ++ _POSNormalizationTable[dict] = dict.pos ++ _POStoDictionaryTable[dict.pos] = dict + + _initializePOSTables() + + def _normalizePOS(pos): + norm = _POSNormalizationTable.get(pos) + if norm: +- return norm +- raise TypeError, `pos` + " is not a part of speech type" ++ return norm ++ raise TypeError(repr(pos) + " is not a part of speech type") + + def _dictionaryFor(pos): + pos = _normalizePOS(pos) + dict = _POStoDictionaryTable.get(pos) + if dict == None: +- raise RuntimeError, "The " + `pos` + " dictionary has not been created" ++ raise RuntimeError("The " + repr(pos) + " dictionary has not been created") + return dict + + def buildIndexFiles(): + for dict in Dictionaries: +- dict._buildIndexCacheFile() ++ dict._buildIndexCacheFile() + + + # +@@ -1412,7 +1411,7 @@ + def _testKeys(): + #This is slow, so don't do it as part of the normal test procedure. + for dictionary in Dictionaries: +- dictionary._testKeys() ++ dictionary._testKeys() + + def _test(reset=0): + import doctest, wordnet diff --git a/mythes-en.spec b/mythes-en.spec new file mode 100644 index 0000000..caa3948 --- /dev/null +++ b/mythes-en.spec @@ -0,0 +1,120 @@ +Name: mythes-en +Summary: English thesaurus +Version: 3.0 +Release: 23%{?dist} +Source: http://www.danielnaber.de/wn2ooo/wn2ooo20050723.tgz +URL: http://www.danielnaber.de/wn2ooo/ +BuildRequires: python3-devel +BuildRequires: perl-interpreter +BuildRequires: wordnet = %{version} +# License BSD is for the th_gen_idx.pl file +# License Artistic Clarified is for python files +License: BSD and Artistic clarified +BuildArch: noarch +Requires: mythes +Supplements: (mythes and langpacks-en) + +Patch0: mythes-en.python3.patch + +%description +English thesaurus. + +%prep +%setup -q -c %{name}-%{version} +%patch0 -p1 -b .python3 + +%build +export WNHOME=%{_datadir}/wordnet-%{version} +python3 wn2ooo/wn2ooo.py > th_en_US_v2.dat +cat th_en_US_v2.dat | perl wn2ooo/th_gen_idx.pl > th_en_US_v2.idx + + +%install +mkdir -p $RPM_BUILD_ROOT/%{_datadir}/mythes +cp -p th_en_US_v2.* $RPM_BUILD_ROOT/%{_datadir}/mythes + +pushd $RPM_BUILD_ROOT/%{_datadir}/mythes/ +en_US_aliases="en_AG en_AU en_BS en_BW en_BZ en_CA en_DK en_GB en_GH en_IE en_IN en_JM en_MW en_NA en_NG en_NZ en_PH en_SG en_TT en_ZA en_ZM en_ZW" +for lang in $en_US_aliases; do + ln -s th_en_US_v2.idx "th_"$lang"_v2.idx" + ln -s th_en_US_v2.dat "th_"$lang"_v2.dat" +done +popd + + +%files +%doc wn2ooo/LICENSE_th_gen_idx.txt wn2ooo/README.txt +%{_datadir}/mythes/* + +%changelog +* Tue Aug 14 2018 Parag Nemade - 3.0-23 +- Resolves:rh#1615554: mythes-en FTBFS for missing BR:python3-devel + +* Mon Jul 30 2018 Caolán McNamara - 3.0-22 +- Related: rhbz#1604911 convert python2 to python3, verified + that output th_en_US_v2.dat and th_en_US_v2.idx are identical + before and after + +* Mon Jul 30 2018 Parag Nemade - 3.0-21 +- Try to correct the license + +* Thu Feb 08 2018 Fedora Release Engineering - 3.0-20 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + +* Wed Jul 26 2017 Fedora Release Engineering - 3.0-19 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + +* Fri Feb 10 2017 Fedora Release Engineering - 3.0-18 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + +* Fri Feb 19 2016 Parag Nemade - 3.0-17 +- Add Supplements: tag for langpacks naming guidelines +- Clean the specfile to follow current packaging guidelines + +* Thu Feb 04 2016 Fedora Release Engineering - 3.0-16 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild + +* Wed Jun 17 2015 Fedora Release Engineering - 3.0-15 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild + +* Sat Jun 07 2014 Fedora Release Engineering - 3.0-14 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild + +* Sat Aug 03 2013 Fedora Release Engineering - 3.0-13 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild + +* Thu Feb 14 2013 Fedora Release Engineering - 3.0-12 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Fri Jul 20 2012 Fedora Release Engineering - 3.0-11 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Thu Apr 12 2012 Caolán McNamara - 3.0-10 +- add some aliases + +* Fri Jan 13 2012 Fedora Release Engineering - 3.0-9 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild + +* Tue Feb 08 2011 Fedora Release Engineering - 3.0-8 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + +* Wed Apr 07 2010 Caolán McNamara - 3.0-7 +- clarify licence of tools + +* Sat Apr 03 2010 Caolán McNamara - 3.0-6 +- mythes now owns /usr/share/mythes + +* Sat Jul 25 2009 Fedora Release Engineering - 3.0-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild + +* Fri Jun 12 2009 Caolán McNamara - 3.0-4 +- extend coverage + +* Wed Jun 10 2009 Caolán McNamara - 3.0-3 +- rebuild against wordnet package + +* Wed Feb 25 2009 Fedora Release Engineering - 3.0-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild + +* Wed Nov 28 2007 Caolán McNamara - 3.0-1 +- initial version diff --git a/sources b/sources new file mode 100644 index 0000000..0711133 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +SHA512 (wn2ooo20050723.tgz) = 113cb02ae101c3c5d62deb19dbdbc3a9769d4a64cb40a89136240a3335d082015b5d3e57c58de9db9ca8c034001b5b2049a10272be34118e7b398765de3eda6b