1
2
3
4
5
6
7
8
9
10
11 from itertools import islice
12
13 import nltk.data
14 from nltk import defaultdict
15
16 from dictionary import dictionaryFor
17 from util import *
18
19 MORPHOLOGICAL_SUBSTITUTIONS = {
20 NOUN:
21 [('s', ''), ('ses', 's'), ('ves', 'f'),
22 ('xes', 'x'), ('zes', 'z'), ('ches', 'ch'),
23 ('shes', 'sh'), ('men', 'man'), ('ies', 'y')],
24 VERB:
25 [('s', ''), ('ies', 'y'), ('es', 'e'), ('es', ''),
26 ('ed', 'e'), ('ed', ''), ('ing', 'e'), ('ing', '')],
27 ADJECTIVE:
28 [('er', ''), ('est', ''), ('er', 'e'), ('est', 'e')],
29 ADVERB:
30 []}
31
33 '''Identify the base forms for a given word-form with a given POS.
34 First it checks if the word is found in the exception list for this POS.
35 If so, it identifies all the exception's base forms.
36 Next it recurses with the word-form and a list of
37 suffix substitutions for that POS.
38 For every (old,new) pair of strings in the substitution list, if
39 the form ends with old, a new form is created by replacing old with
40 new and doing a recursive call.
41
42 >>> morphy('dogs')
43 'dog'
44 >>> morphy('churches')
45 'church'
46 >>> morphy('aardwolves')
47 'aardwolf'
48 >>> morphy('abaci')
49 'abacus'
50 >>> morphy('hardrock', ADVERB)
51 '''
52
53 first = list(islice(_morphy(form, pos), 1))
54 if len(first) == 1:
55 return first[0]
56 else:
57 return None
58
76
77 exceptions = binarySearchFile(excfile, form)
78 if exceptions:
79 forms = exceptions[exceptions.find(' ')+1:-1].split()
80 for f in forms:
81 yield f
82 if pos == NOUN and form.endswith('ful'):
83 suffix = 'ful'
84 form = form[:-3]
85 else:
86 suffix = ''
87 for f in trySubstitutions(form, substitutions):
88 yield f + suffix
89
90
91
93 word = word.lower()
94 print '\n===================='
95 print 'Word is', word
96 print '===================='
97 pos_forms = defaultdict(set)
98
99 for pos in [NOUN, VERB, ADJECTIVE, ADVERB]:
100 for form in _morphy(word, pos=pos):
101 pos_forms[pos].add(form)
102 for pos in [NOUN, VERB, ADJECTIVE, ADVERB]:
103 if pos in pos_forms:
104 print '%s: ' % pos.capitalize(),
105 for f in pos_forms[pos]:
106 print f,
107 print
108 print '===================='
109
111 for word in ['dogs', 'churches', 'aardwolves', 'abaci', 'hardrock']:
112 p(word)
113 while True:
114 word = raw_input('Enter a word: ')
115 if word == '': break
116 p(word)
117
118 if __name__ == '__main__':
119 demo()
120
121 __all__ = ['demo', 'morphy', '_morphy']
122