Package nltk :: Package corpus :: Package reader :: Module wordlist
[hide private]
[frames] | no frames]

Source Code for Module nltk.corpus.reader.wordlist

 1  # Natural Language Toolkit: Word List Corpus Reader 
 2  # 
 3  # Copyright (C) 2001-2008 NLTK Project 
 4  # Author: Steven Bird <[email protected]> 
 5  #         Edward Loper <[email protected]> 
 6  # URL: <http://nltk.org> 
 7  # For license information, see LICENSE.TXT 
 8   
 9  from nltk.corpus.reader.util import * 
10  from nltk.corpus.reader.api import * 
11  from nltk.internals import deprecated 
12  from nltk.tokenize import line_tokenize 
13   
14 -class WordListCorpusReader(CorpusReader):
15 """ 16 List of words, one per line. Blank lines are ignored. 17 """
18 - def words(self, files=None):
19 return line_tokenize(self.raw(files))
20
21 - def raw(self, files=None):
22 if files is None: files = self._files 23 elif isinstance(files, basestring): files = [files] 24 return concat([self.open(f).read() for f in files])
25 26 #{ Deprecated since 0.8 27 @deprecated("Use .raw() or .words() instead.")
28 - def read(self, items=None, format='listed'):
29 if format == 'raw': return self.raw(items) 30 if format == 'listed': return self.words(items) 31 raise ValueError('bad format %r' % format)
32 @deprecated("Use .words() instead.")
33 - def listed(self, items=None):
34 return self.words(items)
35 #} 36