Package nltk :: Package parse :: Module api
[hide private]
[frames] | no frames]

Source Code for Module nltk.parse.api

  1  # Natural Language Toolkit: Parser API 
  2  # 
  3  # Copyright (C) 2001-2008 NLTK Project 
  4  # Author: Steven Bird <[email protected]> 
  5  #         Edward Loper <[email protected]> 
  6  # URL: <http://nltk.org> 
  7  # For license information, see LICENSE.TXT 
  8  # 
  9   
 10  import itertools 
 11   
 12  from nltk.internals import deprecated, Deprecated, overridden 
 13   
14 -class ParserI(object):
15 """ 16 A processing class for deriving trees that represent possible 17 structures for a sequence of tokens. These tree structures are 18 known as X{parses}. Typically, parsers are used to derive syntax 19 trees for sentences. But parsers can also be used to derive other 20 kinds of tree structure, such as morphological trees and discourse 21 structures. 22 23 Subclasses must define: 24 - at least one of: L{parse()}, L{nbest_parse()}, L{iter_parse()}, 25 L{batch_parse()}, L{batch_nbest_parse()}, L{batch_iter_parse()}. 26 27 Subclasses may define: 28 - L{grammar()} 29 - either L{prob_parse()} or L{batch_prob_parse()} (or both) 30 """
31 - def grammar(self):
32 """ 33 @return: The grammar used by this parser. 34 """ 35 raise NotImplementedError()
36
37 - def parse(self, sent):
38 """ 39 @return: A parse tree that represents the structure of the 40 given sentence, or C{None} if no parse tree is found. If 41 multiple parses are found, then return the best parse. 42 43 @param sent: The sentence to be parsed 44 @type sent: L{list} of L{string} 45 @rtype: L{Tree} 46 """ 47 if overridden(self.batch_parse): 48 return self.batch_parse([sent])[0] 49 else: 50 trees = self.nbest_parse(sent, 1) 51 if trees: return trees[0] 52 else: return None
53
54 - def nbest_parse(self, sent, n=None):
55 """ 56 @return: A list of parse trees that represent possible 57 structures for the given sentence. When possible, this list is 58 sorted from most likely to least likely. If C{n} is 59 specified, then the returned list will contain at most C{n} 60 parse trees. 61 62 @param sent: The sentence to be parsed 63 @type sent: L{list} of L{string} 64 @param n: The maximum number of trees to return. 65 @type n: C{int} 66 @rtype: C{list} of L{Tree} 67 """ 68 if overridden(self.batch_nbest_parse): 69 return self.batch_nbest_parse([sent],n)[0] 70 elif overridden(self.parse) or overriden(self.batch_parse): 71 tree = self.parse(sent) 72 if tree: return [tree] 73 else: return [] 74 else: 75 return list(itertools.islice(self.iter_parse(sent), n))
76
77 - def iter_parse(self, sent):
78 """ 79 @return: An iterator that generates parse trees that represent 80 possible structures for the given sentence. When possible, 81 this list is sorted from most likely to least likely. 82 83 @param sent: The sentence to be parsed 84 @type sent: L{list} of L{string} 85 @rtype: C{iterator} of L{Tree} 86 """ 87 if overridden(self.batch_iter_parse): 88 return self.batch_iter_parse([sent])[0] 89 elif overridden(self.nbest_parse) or overridden(self.batch_nbest_parse): 90 return iter(self.nbest_parse(sent)) 91 elif overridden(self.parse) or overriden(self.batch_parse): 92 tree = self.parse(sent) 93 if tree: return iter([tree]) 94 else: return iter([]) 95 else: 96 raise NotImplementedError()
97
98 - def prob_parse(self, sent):
99 """ 100 @return: A probability distribution over the possible parse 101 trees for the given sentence. If there are no possible parse 102 trees for the given sentence, return a probability distribution 103 that assigns a probability of 1.0 to C{None}. 104 105 @param sent: The sentence to be parsed 106 @type sent: L{list} of L{string} 107 @rtype: L{ProbDistI} of L{Tree} 108 """ 109 if overridden(self.batch_prob_parse): 110 return self.batch_prob_parse([sent])[0] 111 else: 112 raise NotImplementedError
113
114 - def batch_parse(self, sents):
115 """ 116 Apply L{self.parse()} to each element of C{sents}. I.e.: 117 118 >>> return [self.parse(sent) for sent in sents] 119 120 @rtype: C{list} of L{Tree} 121 """ 122 return [self.parse(sent) for sent in sents]
123
124 - def batch_nbest_parse(self, sents, n=None):
125 """ 126 Apply L{self.nbest_parse()} to each element of C{sents}. I.e.: 127 128 >>> return [self.nbest_parse(sent, n) for sent in sents] 129 130 @rtype: C{list} of C{list} of L{Tree} 131 """ 132 return [self.nbest_parse(sent,n ) for sent in sents]
133
134 - def batch_iter_parse(self, sents):
135 """ 136 Apply L{self.iter_parse()} to each element of C{sents}. I.e.: 137 138 >>> return [self.iter_parse(sent) for sent in sents] 139 140 @rtype: C{list} of C{iterator} of L{Tree} 141 """ 142 return [self.iter_parse(sent) for sent in sents]
143
144 - def batch_prob_parse(self, sents):
145 """ 146 Apply L{self.prob_parse()} to each element of C{sents}. I.e.: 147 148 >>> return [self.prob_parse(sent) for sent in sents] 149 150 @rtype: C{list} of L{ProbDistI} of L{Tree} 151 """ 152 return [self.prob_parse(sent) for sent in sents]
153 154 155 #//////////////////////////////////////////////////////////// 156 #{ Deprecated 157 @deprecated("Use parse() instead.")
158 - def get_parse(self, sent):
159 return self.parse(sent)
160 @deprecated("Use nbest_parse() instead.")
161 - def get_parse_list(self, sent):
162 return self.nbest_parse(sent)
163 @deprecated("Use prob_parse() instead.")
164 - def get_parse_prob(self, sent):
165 return self.prob_parse(sent)
166 @deprecated("Use prob_parse() instead.")
167 - def get_parse_dict(self, sent):
168 return self.prob_parse(sent)
169 @deprecated("No longer supported.")
170 - def batch_test(self, filename):
171 f = open(filename) 172 for line in f: 173 line = line.strip() 174 if not line: continue 175 if line.startswith('#'): 176 print line 177 continue 178 print "Sentence:", line 179 parses = self.nbest_parse(line) 180 print "%d parses." % len(parses) 181 for tree in parses: print tree
182 #} 183 #//////////////////////////////////////////////////////////// 184 185 ###################################################################### 186 #{ Deprecated
187 -class ParseI(ParserI, Deprecated):
188 "Use ParserI instead."
189 -class AbstractParser(Deprecated, ParserI):
190 """Use ParserI instead.""" 191 @deprecated("Use nltk.cfg.Grammar.check_coverage() instead.")
192 - def _check_coverage(self, tokens):
193 self._grammar.check_coverage(tokens)
194 #} 195 ###################################################################### 196