Package nltk :: Package classify :: Module api
[hide private]
[frames] | no frames]

Source Code for Module nltk.classify.api

  1  # Natural Language Toolkit: Classifier Interface 
  2  # 
  3  # Copyright (C) 2001-2008 NLTK Project 
  4  # Author: Edward Loper <[email protected]> 
  5  #         Steven Bird <[email protected]> (minor additions) 
  6  # URL: <http://nltk.org> 
  7  # For license information, see LICENSE.TXT 
  8   
  9  """ 
 10  Interfaces for labeling tokens with category labels (or X{class 
 11  labels}). 
 12   
 13  L{ClassifierI} is a standard interface for X{single-category 
 14  classification}, in which: 
 15   
 16      - The set of categories is known. 
 17      - The number of categories is finite. 
 18      - Each text belongs to exactly one category. 
 19   
 20  L{MultiClassifierI} is a standard interface for C{multi-category 
 21  classification}, in which: 
 22   
 23      - The set of categories is known. 
 24      - The number of categories is finite. 
 25      - Each text belongs to zero or more categories. 
 26  """ 
 27  from nltk.internals import deprecated, overridden 
 28   
 29  ##////////////////////////////////////////////////////// 
 30  #{ Classification Interfaces 
 31  ##////////////////////////////////////////////////////// 
 32   
33 -class ClassifierI(object):
34 """ 35 A processing interface for labeling tokens with a single category 36 label (or X{class}). Labels are typically C{string}s or 37 C{integer}s, but can be any immutable type. The set of labels 38 that the classifier chooses from must be fixed and finite. 39 40 Subclasses must define: 41 - L{labels()} 42 - either L{classify()} or L{batch_classify()} (or both) 43 44 Subclasses may define: 45 - either L{prob_classify()} or L{batch_prob_classify()} (or both) 46 """
47 - def labels(self):
48 """ 49 @return: the list of category labels used by this classifier. 50 @rtype: C{list} of (immutable) 51 """ 52 raise NotImplementedError()
53
54 - def classify(self, featureset):
55 """ 56 @return: the most appropriate label for the given featureset. 57 @rtype: label 58 """ 59 if overridden(self.batch_classify): 60 return self.batch_classify([featureset])[0] 61 else: 62 raise NotImplementedError()
63
64 - def prob_classify(self, featureset):
65 """ 66 @return: a probability distribution over labels for the given 67 featureset. 68 @rtype: L{ProbDistI <nltk.probability.ProbDistI>} 69 """ 70 if overridden(self.batch_prob_classify): 71 return self.batch_prob_classify([featureset])[0] 72 else: 73 raise NotImplementedError()
74
75 - def batch_classify(self, featuresets):
76 """ 77 Apply L{self.classify()} to each element of C{featuresets}. I.e.: 78 79 >>> return [self.classify(fs) for fs in featuresets] 80 81 @rtype: C{list} of I{label} 82 """ 83 return [self.classify(fs) for fs in featuresets]
84
85 - def batch_prob_classify(self, featuresets):
86 """ 87 Apply L{self.prob_classify()} to each element of C{featuresets}. I.e.: 88 89 >>> return [self.prob_classify(fs) for fs in featuresets] 90 91 @rtype: C{list} of L{ProbDistI <nltk.probability.ProbDistI>} 92 """ 93 return [self.prob_classify(fs) for fs in featuresets]
94 95 #{ Deprecated 96 @deprecated("Use .batch_prob_classify() instead.")
97 - def batch_probdist(self, featuresets):
98 return self.batch_prob_classify(featuresets)
99 @deprecated("Use .prob_classify() instead.")
100 - def probdist(self, featureset):
101 return self.prob_classify(featureset)
102 #} 103
104 -class MultiClassifierI(object):
105 """ 106 A processing interface for labeling tokens with zero or more 107 category labels (or X{labels}). Labels are typically C{string}s 108 or C{integer}s, but can be any immutable type. The set of labels 109 that the multi-classifier chooses from must be fixed and finite. 110 111 Subclasses must define: 112 - L{labels()} 113 - either L{classify()} or L{batch_classify()} (or both) 114 115 Subclasses may define: 116 - either L{prob_classify()} or L{batch_prob_classify()} (or both) 117 """
118 - def labels(self):
119 """ 120 @return: the list of category labels used by this classifier. 121 @rtype: C{list} of (immutable) 122 """ 123 raise NotImplementedError()
124
125 - def classify(self, featureset):
126 """ 127 @return: the most appropriate set of labels for the given featureset. 128 @rtype: C{set} of I{label} 129 """ 130 if overridden(self.batch_classify): 131 return self.batch_classify([featureset])[0] 132 else: 133 raise NotImplementedError()
134
135 - def prob_classify(self, featureset):
136 """ 137 @return: a probability distribution over sets of labels for the 138 given featureset. 139 @rtype: L{ProbDistI <nltk.probability.ProbDistI>} 140 """ 141 if overridden(self.batch_prob_classify): 142 return self.batch_prob_classify([featureset])[0] 143 else: 144 raise NotImplementedError()
145
146 - def batch_classify(self, featuresets):
147 """ 148 Apply L{self.classify()} to each element of C{featuresets}. I.e.: 149 150 >>> return [self.classify(fs) for fs in featuresets] 151 152 @rtype: C{list} of (C{set} of I{label}) 153 """ 154 return [self.classify(fs) for fs in featuresets]
155
156 - def batch_prob_classify(self, featuresets):
157 """ 158 Apply L{self.prob_classify()} to each element of C{featuresets}. I.e.: 159 160 >>> return [self.prob_classify(fs) for fs in featuresets] 161 162 @rtype: C{list} of L{ProbDistI <nltk.probability.ProbDistI>} 163 """ 164 return [self.prob_classify(fs) for fs in featuresets]
165 166 #{ Deprecated 167 @deprecated("Use .batch_prob_classify() instead.")
168 - def batch_probdist(self, featuresets):
169 return self.batch_prob_classify(featuresets)
170 @deprecated("Use .prob_classify() instead.")
171 - def probdist(self, featureset):
172 return self.prob_classify(featureset)
173 #} 174 175 # # [XX] IN PROGRESS: 176 # class SequenceClassifierI(object): 177 # """ 178 # A processing interface for labeling sequences of tokens with a 179 # single category label (or X{class}). Labels are typically 180 # C{string}s or C{integer}s, but can be any immutable type. The set 181 # of labels that the classifier chooses from must be fixed and 182 # finite. 183 # """ 184 # def labels(self): 185 # """ 186 # @return: the list of category labels used by this classifier. 187 # @rtype: C{list} of (immutable) 188 # """ 189 # raise NotImplementedError() 190 191 # def prob_classify(self, featureset): 192 # """ 193 # Return a probability distribution over labels for the given 194 # featureset. 195 196 # If C{featureset} is a list of featuresets, then return a 197 # corresponding list containing the probability distribution 198 # over labels for each of the given featuresets, where the 199 # M{i}th element of this list is the most appropriate label for 200 # the M{i}th element of C{featuresets}. 201 # """ 202 # raise NotImplementedError() 203 204 # def classify(self, featureset): 205 # """ 206 # Return the most appropriate label for the given featureset. 207 208 # If C{featureset} is a list of featuresets, then return a 209 # corresponding list containing the most appropriate label for 210 # each of the given featuresets, where the M{i}th element of 211 # this list is the most appropriate label for the M{i}th element 212 # of C{featuresets}. 213 # """ 214 # raise NotImplementedError() 215