1
2
3
4
5
6
7
8
9
10
11
12
13 import types
14 from cache import entityCache
15
16 import nltk.data
17 from nltk.internals import deprecated
18
19 from util import *
20
22 """
23 A Dictionary contains all the Words in a given part of speech. Four
24 dictionaries, bound to N, V, ADJ, and ADV, are bound by default in
25 __init.py__.
26
27 Indexing a dictionary by a string retrieves the word named by that
28 string, e.g. dict['dog']. Indexing by an integer n retrieves the
29 nth word, e.g. dict[0]. Access by an arbitrary integer is very
30 slow except in the special case where the words are accessed
31 sequentially; this is to support the use of dictionaries as the
32 range of a for statement and as the sequence argument to map and filter.
33
34 >>> N['dog']
35 dog(n.)
36 """
37
39 """
40 @type pos: C{string}
41 @param pos: This L{Dictionary}'s part of speech ('noun', 'verb' etc.)
42 @type filenameroot: C{string}
43 @param filenameroot: filename of the relevant Wordnet dictionary file
44 """
45 self.pos = pos
46 self._filenameroot = filenameroot
47 self._loaded = False
48
56
58 self.load()
59 dictionaryVariables = {}
60
61 if dictionaryVariables.get(self):
62 return self.__module__ + "." + dictionaryVariables[self]
63
64 return "<%s.%s instance for %s>" % \
65 (self.__module__, "Dictionary", self.pos)
66
67
68 @deprecated("Use Dictionary.word() instead.")
69 - def getWord(self, form, line=None):
71
72 - def word(self, form, line=None):
73 """
74 @type form: C{string}
75 @param form: word string e.g, 'dog'
76 @type line: C{string}
77 @param line: appropriate line sourced from the index file (optional)
78 @return: The L{Word} object with the supplied form, if present.
79 """
80 self.load()
81 key = form.lower().replace(' ', '_')
82 pos = self.pos
83
84 def loader(key=key, line=line, indexFile=self.indexFile):
85 from synset import Word
86 line = line or indexFile.get(key)
87 return line and Word(line)
88
89 word = entityCache.get((pos, key), loader)
90
91 if word: return word
92 else: raise KeyError, "%s is not in the %s database" % (`form`, `pos`)
93
94
95 @deprecated("Use Dictionary.word() instead.")
97 return synset(self, offset)
98
100 """
101 @type offset: C{int}
102 @param offset: integer offset into a Wordnet file, at which the
103 desired L{Synset} can be found.
104
105 @return: The relevant L{Synset}, if present.
106 """
107
108 self.load()
109 def loader(pos=self.pos, offset=offset, dataFile=self.dataFile):
110 from synset import Synset
111 dataFile.seek(offset)
112 line = dataFile.readline()
113 return Synset(pos, offset, line)
114
115 return entityCache.get((self.pos, offset), loader)
116
120
122 """
123 >>> N and 'true'
124 'true'
125 """
126 self.load()
127 return 1
128
130 """
131 Return the number of index entries.
132
133 >>> len(ADJ)
134 21435
135 """
136 self.load()
137 if not hasattr(self, 'length'):
138 self.length = len(self.indexFile)
139
140 return self.length
141
143 self.load()
144 results = []
145
146 if type(a) == type('') and type(b) == type(''):
147 raise NotImplementedError()
148
149 elif type(a) == type(1) and type(b) == type(1):
150 for i in range(a, b):
151 results.append(self[i])
152
153 else:
154 raise TypeError
155
156 return results
157
159 """
160 If index is a String, return the Word whose form is
161 index. If index is an integer n, return the Word
162 indexed by the n'th Word in the Index file.
163
164 >>> N['dog']
165 dog(n.)
166 >>> N[0]
167 'hood(n.)
168 """
169 self.load()
170 if type(index) in types.StringTypes:
171 return self.word(index)
172
173 elif type(index) == types.IntType:
174 line = self.indexFile[index]
175 return self.word(string.replace(line[:string.find(line, ' ')], '_', ' '), line)
176
177 else:
178 raise TypeError, "%s is not a String or Int" % `index`
179
181 self.load()
182 return iter(self.keys())
183
187
188 - def get(self, key, default=None):
189 """
190 Return the Word whose form is key, or default.
191
192 >>> N.get('dog')
193 dog(n.)
194
195 @type key: C{string}
196 @param key: the string form of a L{Word} e.g. 'dog'
197 @type default: L{Word}
198 @param default: An optional L{Word} to return if no entry can be found
199 with the supplied key.
200 @return: The L{Word} whose form is given by 'key'
201 """
202 self.load()
203 try:
204 return self[key]
205
206 except LookupError:
207 return default
208
210 """
211 @return: A sorted list of strings that index words in this
212 dictionary.
213 """
214 self.load()
215 return self.indexFile.keys()
216
218 """
219 Checks if the supplied argument is an index into this dictionary.
220
221 >>> N.has_key('dog')
222 1
223 >>> N.has_key('inu')
224 0
225
226 @type form: C{string}
227 @param form: a word string e.g. 'dog'
228 @return: true iff the argument indexes a word in this dictionary.
229 """
230 self.load()
231 return self.indexFile.has_key(form)
232
233
234
236
237 self.load()
238 print "Testing: ", self
239 file = open(self.indexFile.file.name, _FILE_OPEN_MODE)
240 counter = 0
241
242 while 1:
243 line = file.readline()
244
245 if line == '': break
246
247 if line[0] != ' ':
248 key = string.replace(line[:string.find(line, ' ')], '_', ' ')
249
250 if (counter % 1000) == 0:
251 print "%s..." % (key,),
252 import sys
253 sys.stdout.flush()
254
255 counter = counter + 1
256 self[key]
257
258 file.close()
259 print "done."
260
261
262
263 N = Dictionary(NOUN, NOUN)
264 V = Dictionary(VERB, VERB)
265 ADJ = Dictionary(ADJECTIVE, ADJECTIVE)
266 ADV = Dictionary(ADVERB, ADVERB)
267
268 Dictionaries = {NOUN: N, VERB: V, ADJECTIVE: ADJ, ADVERB: ADV}
269
271 """
272 Return the dictionary for the supplied part of speech.
273
274 @type pos: C{string}
275 @param pos: The part of speech of the desired dictionary.
276
277 @return: The desired dictionary.
278 """
279 pos = normalizePOS(pos)
280 try:
281 d = Dictionaries[pos]
282 except KeyError:
283 raise RuntimeError, "The " + `pos` + " dictionary has not been created"
284
285 return d
286
287
288
289
290
291 @deprecated("Use dictionary.word() instead.")
294
296 """
297 Return a word with the given lexical form and pos.
298
299 @type form: C{string}
300 @param form: the sought-after word string e.g. 'dog'
301
302 @type pos: C{string}
303 @param pos: the desired part of speech. Defaults to 'noun'.
304
305 @return: the L{Word} object corresponding to form and pos, if it exists.
306 """
307 return dictionaryFor(pos).word(form)
308
309
310 @deprecated("Use dictionary.sense() instead.")
313
315 """
316 Lookup a sense by its sense number. Used by repr(sense).
317
318 @type form: C{string}
319 @param form: the sought-after word string e.g. 'dog'
320 @type pos: C{string}
321 @param pos: the desired part of speech. Defaults to 'noun'.
322 @type senseno: C{int}
323 @param senseno: the id of the desired word sense. Defaults to 0.
324 @return: the L{Synset} object corresponding to form, pos and senseno, if it exists.
325 """
326 return word(form, pos)[senseno]
327
328
329 @deprecated("Use dictionary.synset() instead.")
332
333
335 """
336 Lookup a synset by its offset.
337
338 @type pos: C{string}
339 @param pos: the desired part of speech.
340 @type offset: C{int}
341 @param offset: the offset into the relevant Wordnet dictionary file.
342 @return: the L{Synset} object extracted from the Wordnet dictionary file.
343 """
344 return dictionaryFor(pos).synset(offset)
345