1
2
3
4
5
6
7
8
9 """
10 Interface for tagging each token in a sentence with supplementary
11 information, such as its part of speech.
12 """
13
14 from nltk.internals import overridden
15
17 """
18 A processing interface for assigning a tag to each token in a list.
19 Tags are case sensitive strings that identify some property of each
20 token, such as its part of speech or its sense.
21
22 Some taggers require specific types for their tokens. This is
23 generally indicated by the use of a sub-interface to C{TaggerI}.
24 For example, I{featureset taggers}, which are subclassed from
25 L{FeaturesetTaggerI}, require that each token be a I{featureset}.
26
27 Subclasses must define:
28 - either L{tag()} or L{batch_tag()} (or both)
29 """
30 - def tag(self, tokens):
31 """
32 Determine the most appropriate tag sequence for the given
33 token sequence, and return a corresponding list of tagged
34 tokens. A tagged token is encoded as a tuple C{(token, tag)}.
35
36 @rtype: C{list} of C{(token, tag)}
37 """
38 if overridden(self.batch_tag):
39 return self.batch_tag([tokens])[0]
40 else:
41 raise NotImplementedError()
42
44 """
45 Apply L{self.tag()} to each element of C{sentences}. I.e.:
46
47 >>> return [self.tag(tokens) for tokens in sentences]
48 """
49 return [self.tag(tokens) for tokens in sentences]
50
52 """
53 A tagger that requires tokens to be I{featuresets}. A featureset
54 is a dictionary that maps from I{feature names} to I{feature
55 values}. See L{nltk.classify} for more information about features
56 and featuresets.
57 """
58
59
78