Package nltk :: Package cluster :: Module api
[hide private]
[frames] | no frames]

Source Code for Module nltk.cluster.api

 1  # Natural Language Toolkit: Clusterer Interfaces 
 2  # 
 3  # Copyright (C) 2001-2008 NLTK Project 
 4  # Author: Trevor Cohn <[email protected]> 
 5  # Porting: Steven Bird <[email protected]> 
 6  # URL: <http://nltk.org> 
 7  # For license information, see LICENSE.TXT 
 8   
 9  from nltk import DictionaryProbDist 
10   
11 -class ClusterI:
12 """ 13 Interface covering basic clustering functionality. 14 """ 15
16 - def cluster(self, vectors, assign_clusters=False):
17 """ 18 Assigns the vectors to clusters, learning the clustering parameters 19 from the data. Returns a cluster identifier for each vector. 20 """ 21 raise AssertionError()
22
23 - def classify(self, token):
24 """ 25 Classifies the token into a cluster, setting the token's CLUSTER 26 parameter to that cluster identifier. 27 """ 28 raise AssertionError()
29
30 - def likelihood(self, vector, label):
31 """ 32 Returns the likelihood (a float) of the token having the 33 corresponding cluster. 34 """ 35 if self.classify(vector) == label: 36 return 1.0 37 else: 38 return 0.0
39
40 - def classification_probdist(self, vector):
41 """ 42 Classifies the token into a cluster, returning 43 a probability distribution over the cluster identifiers. 44 """ 45 likelihoods = {} 46 sum = 0.0 47 for cluster in self.cluster_names(): 48 likelihoods[cluster] = self.likelihood(vector, cluster) 49 sum += likelihoods[cluster] 50 for cluster in self.cluster_names(): 51 likelihoods[cluster] /= sum 52 return DictionaryProbDist(likelihoods)
53
54 - def num_clusters(self):
55 """ 56 Returns the number of clusters. 57 """ 58 raise AssertError()
59
60 - def cluster_names(self):
61 """ 62 Returns the names of the clusters. 63 """ 64 return range(self.num_clusters())
65
66 - def cluster_name(self, index):
67 """ 68 Returns the names of the cluster at index. 69 """ 70 return index
71