1
2
3
4
5
6
7
8 """
9 Tokenizer Interface
10 """
11 from nltk.internals import overridden
12
14 """
15 A procesing interface for I{tokenizing} a string, or dividing it
16 into a list of substrings.
17
18 Subclasses must define:
19 - either L{tokenize()} or L{batch_tokenize()} (or both)
20 """
22 """
23 Divide the given string into a list of substrings.
24
25 @return: C{list} of C{str}
26 """
27 if overridden(self.batch_tokenize):
28 return self.batch_tokenize([s])[0]
29 else:
30 raise NotImplementedError()
31
33 """
34 Apply L{self.tokenize()} to each element of C{strings}. I.e.:
35
36 >>> return [self.tokenize(s) for s in strings]
37
38 @rtype: C{list} of C{list} of C{str}
39 """
40 return [self.tokenize(s) for s in strings]
41