1
2
3
4
5
6
7
8
9 """
10 Interfaces for labeling tokens with category labels (or X{class
11 labels}).
12
13 L{ClassifierI} is a standard interface for X{single-category
14 classification}, in which:
15
16 - The set of categories is known.
17 - The number of categories is finite.
18 - Each text belongs to exactly one category.
19
20 L{MultiClassifierI} is a standard interface for C{multi-category
21 classification}, in which:
22
23 - The set of categories is known.
24 - The number of categories is finite.
25 - Each text belongs to zero or more categories.
26 """
27 from nltk.internals import deprecated, overridden
28
29
30
31
32
34 """
35 A processing interface for labeling tokens with a single category
36 label (or X{class}). Labels are typically C{string}s or
37 C{integer}s, but can be any immutable type. The set of labels
38 that the classifier chooses from must be fixed and finite.
39
40 Subclasses must define:
41 - L{labels()}
42 - either L{classify()} or L{batch_classify()} (or both)
43
44 Subclasses may define:
45 - either L{prob_classify()} or L{batch_prob_classify()} (or both)
46 """
48 """
49 @return: the list of category labels used by this classifier.
50 @rtype: C{list} of (immutable)
51 """
52 raise NotImplementedError()
53
55 """
56 @return: the most appropriate label for the given featureset.
57 @rtype: label
58 """
59 if overridden(self.batch_classify):
60 return self.batch_classify([featureset])[0]
61 else:
62 raise NotImplementedError()
63
65 """
66 @return: a probability distribution over labels for the given
67 featureset.
68 @rtype: L{ProbDistI <nltk.probability.ProbDistI>}
69 """
70 if overridden(self.batch_prob_classify):
71 return self.batch_prob_classify([featureset])[0]
72 else:
73 raise NotImplementedError()
74
76 """
77 Apply L{self.classify()} to each element of C{featuresets}. I.e.:
78
79 >>> return [self.classify(fs) for fs in featuresets]
80
81 @rtype: C{list} of I{label}
82 """
83 return [self.classify(fs) for fs in featuresets]
84
86 """
87 Apply L{self.prob_classify()} to each element of C{featuresets}. I.e.:
88
89 >>> return [self.prob_classify(fs) for fs in featuresets]
90
91 @rtype: C{list} of L{ProbDistI <nltk.probability.ProbDistI>}
92 """
93 return [self.prob_classify(fs) for fs in featuresets]
94
95
96 @deprecated("Use .batch_prob_classify() instead.")
99 @deprecated("Use .prob_classify() instead.")
102
103
105 """
106 A processing interface for labeling tokens with zero or more
107 category labels (or X{labels}). Labels are typically C{string}s
108 or C{integer}s, but can be any immutable type. The set of labels
109 that the multi-classifier chooses from must be fixed and finite.
110
111 Subclasses must define:
112 - L{labels()}
113 - either L{classify()} or L{batch_classify()} (or both)
114
115 Subclasses may define:
116 - either L{prob_classify()} or L{batch_prob_classify()} (or both)
117 """
119 """
120 @return: the list of category labels used by this classifier.
121 @rtype: C{list} of (immutable)
122 """
123 raise NotImplementedError()
124
126 """
127 @return: the most appropriate set of labels for the given featureset.
128 @rtype: C{set} of I{label}
129 """
130 if overridden(self.batch_classify):
131 return self.batch_classify([featureset])[0]
132 else:
133 raise NotImplementedError()
134
136 """
137 @return: a probability distribution over sets of labels for the
138 given featureset.
139 @rtype: L{ProbDistI <nltk.probability.ProbDistI>}
140 """
141 if overridden(self.batch_prob_classify):
142 return self.batch_prob_classify([featureset])[0]
143 else:
144 raise NotImplementedError()
145
147 """
148 Apply L{self.classify()} to each element of C{featuresets}. I.e.:
149
150 >>> return [self.classify(fs) for fs in featuresets]
151
152 @rtype: C{list} of (C{set} of I{label})
153 """
154 return [self.classify(fs) for fs in featuresets]
155
157 """
158 Apply L{self.prob_classify()} to each element of C{featuresets}. I.e.:
159
160 >>> return [self.prob_classify(fs) for fs in featuresets]
161
162 @rtype: C{list} of L{ProbDistI <nltk.probability.ProbDistI>}
163 """
164 return [self.prob_classify(fs) for fs in featuresets]
165
166
167 @deprecated("Use .batch_prob_classify() instead.")
170 @deprecated("Use .prob_classify() instead.")
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215