nltk.sem.util

1 # Natural Language Toolkit: Semantic Interpretation 2 # 3 # Author: Ewan Klein <[email protected]> 4 # URL: <http://nltk.org> 5 # For license information, see LICENSE.TXT 6 7 """ 8 Utility functions for batch-processing sentences: parsing and 9 extraction of the semantic representation of the root node of the the 10 syntax tree, followed by evaluation of the semantic representation in 11 a first-order model. 12 """ 13 14 import evaluate 15 import re 16 import nltk 17 from logic import LogicParser, ParseException 18 19 20 ############################################################## 21 ## Utility functions for connecting parse output to semantics 22 ############################################################## 23

24 -def text_parse(inputs, grammar, trace=0):

25 """ 26 Convert input sentences into syntactic trees. 27 28 @parameter inputs: sentences to be parsed 29 @type inputs: C{list} of C{str} 30 @parameter grammar: feature-based grammar to use in parsing 31 @rtype: C{dict} 32 @return: a mapping from input sentences to a list of L{Tree}s 33 """ 34 parses = {} 35 cp = nltk.parse.FeatureEarleyChartParser(grammar, trace=trace) 36 for sent in inputs: 37 tokens = sent.split() 38 syntrees = cp.nbest_parse(tokens) 39 parses[sent] = syntrees 40 return parses

41

42 -def _semrep(node, beta_reduce=True):

43 """ 44 Find the semantic representation at a given tree node. 45 46 @parameter node: node of a parse L{Tree} 47 @rtype: L{logic.Expression} 48 """ 49 assert isinstance(node, nltk.cfg.FeatStructNonterminal) 50 try: 51 semrep = node['sem'] 52 if beta_reduce: 53 semrep = semrep.simplify() 54 return semrep 55 except KeyError: 56 print "Node has no 'sem' feature specification" 57 raise

58

59 -def root_semrep(syntree, beta_reduce=True, start='S'):

60 """ 61 Find the semantic representation at the root of a tree. 62 63 @parameter syntree: a parse L{Tree} 64 @parameter beta_reduce: if C{True}, carry out beta reduction on the logical forms that are returned 65 @return: the semantic representation at the root of a L{Tree} 66 @rtype: L{logic.Expression} 67 """ 68 return _semrep(syntree.node, beta_reduce=beta_reduce)

69

70 -def text_interpret(inputs, grammar, beta_reduce=True, start='S', syntrace=0):

71 """ 72 Add the semantic representation to each syntactic parse tree 73 of each input sentence. 74 75 @parameter inputs: a list of sentences 76 @parameter grammar: a feature-based grammar 77 @return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations) 78 @rtype: C{dict} 79 """ 80 parses = text_parse(inputs, grammar, trace=syntrace) 81 semreps = {} 82 for sent in inputs: 83 syntrees = parses[sent] 84 syn_sem = \ 85 [(syn, root_semrep(syn, beta_reduce=beta_reduce, start=start)) 86 for syn in syntrees] 87 semreps[sent] = syn_sem 88 return semreps

89

90 -def text_evaluate(inputs, grammar, model, assignment, semtrace=0):

91 """ 92 Add the truth-in-a-model value to each semantic representation 93 for each syntactic parse of each input sentences. 94 95 @return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model) 96 @rtype: C{dict} 97 """ 98 g = assignment 99 m = model 100 semreps = text_interpret(inputs, grammar) 101 evaluations = {} 102 for sent in inputs: 103 syn_sem_val = \ 104 [(syn, sem, m.evaluate(str(sem), g, trace=semtrace)) for (syn, sem) in semreps[sent]] 105 evaluations[sent] = syn_sem_val 106 return evaluations

107 108 ########################################## 109 # REs used by the parse_valuation function 110 ########################################## 111 _VAL_SPLIT_RE = re.compile(r'\s*=+>\s*') 112 _ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*') 113 _TUPLES_RE = re.compile(r"""\s* 114 (\([^)]+\)) # tuple-expression 115 \s*""", re.VERBOSE) 116

117 -def parse_valuation_line(s):

118 """ 119 Parse a line in a valuation file. 120 121 Lines are expected to be of the form:: 122 123 noosa => n 124 girl => {g1, g2} 125 chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)} 126 127 @parameter s: input line 128 @type s: C{str} 129 @return: a pair (symbol, value) 130 @rtype: C{tuple} 131 """ 132 pieces = _VAL_SPLIT_RE.split(s) 133 symbol = pieces[0] 134 value = pieces[1] 135 # check whether the value is meant to be a set 136 if value.startswith('{'): 137 value = value[1:-1] 138 tuple_strings = _TUPLES_RE.findall(value) 139 # are the set elements tuples? 140 if tuple_strings: 141 set_elements = [] 142 for ts in tuple_strings: 143 ts = ts[1:-1] 144 element = tuple(_ELEMENT_SPLIT_RE.split(ts)) 145 set_elements.append(element) 146 else: 147 set_elements = _ELEMENT_SPLIT_RE.split(value) 148 value = set(set_elements) 149 return symbol, value

150

151 -def parse_valuation(s):

152 """ 153 Convert a valuation file into a valuation. 154 155 @parameter s: the contents of a valuation file 156 @type s: C{str} 157 @return: a L{nltk.sem} valuation 158 @rtype: L{Valuation} 159 """ 160 statements = [] 161 for linenum, line in enumerate(s.splitlines()): 162 line = line.strip() 163 if line.startswith('#') or line=='': continue 164 try: statements.append(parse_valuation_line(line)) 165 except ValueError: 166 raise ValueError, 'Unable to parse line %s: %s' % (linenum, line) 167 val = evaluate.Valuation(statements) 168 return val

169

170 -def parse_fol(s):

171 """ 172 Convert a file of First Order Formulas into a list of {Expression}s. 173 174 @parameter s: the contents of the file 175 @type s: C{str} 176 @return: a list of parsed formulas. 177 @rtype: C{list} of L{Expression} 178 """ 179 statements = [] 180 lp = LogicParser() 181 for linenum, line in enumerate(s.splitlines()): 182 line = line.strip() 183 if line.startswith('#') or line=='': continue 184 try: 185 statements.append(lp.parse(line)) 186 except ParseException: 187 raise ValueError, 'Unable to parse line %s: %s' % (linenum, line) 188 return statements

189

190 -def demo_model0():

191 global m0, g0 192 val = evaluate.Valuation() 193 #Initialize a valuation of non-logical constants.""" 194 v = [('john', 'b1'), 195 ('mary', 'g1'), 196 ('suzie', 'g2'), 197 ('fido', 'd1'), 198 ('tess', 'd2'), 199 ('noosa', 'n'), 200 ('girl', set(['g1', 'g2'])), 201 ('boy', set(['b1', 'b2'])), 202 ('dog', set(['d1', 'd2'])), 203 ('bark', set(['d1', 'd2'])), 204 ('walk', set(['b1', 'g2', 'd1'])), 205 ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])), 206 ('see', set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'),('d2', 'b1'), ('g2', 'n')])), 207 ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])), 208 ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')])) 209 ] 210 #Read in the data from C{v} 211 val.read(v) 212 #Bind C{dom} to the C{domain} property of C{val} 213 dom = val.domain 214 #Initialize a model with parameters C{dom} and C{val}. 215 m0 = evaluate.Model(dom, val) 216 #Initialize a variable assignment with parameter C{dom} 217 g0 = evaluate.Assignment(dom)

218 219

220 -def read_sents(file):

221 sents = [l.rstrip() for l in open(file)] 222 # get rid of blank lines 223 sents = [l for l in sents if len(l) > 0] 224 sents = [l for l in sents if not l[0] == '#'] 225 return sents

226

227 -def demo():

228 import sys 229 from optparse import OptionParser 230 description = \ 231 """ 232 Parse and evaluate some sentences. 233 """ 234 235 opts = OptionParser(description=description) 236 237 opts.set_defaults(evaluate=True, beta=True, syntrace=0, 238 semtrace=0, demo='default', grammar='', sentences='') 239 240 opts.add_option("-d", "--demo", dest="demo", 241 help="choose demo D; omit this for the default demo, or specify 'chat80'", metavar="D") 242 opts.add_option("-g", "--gram", dest="grammar", 243 help="read in grammar G", metavar="G") 244 opts.add_option("-m", "--model", dest="model", 245 help="import model M (omit '.py' suffix)", metavar="M") 246 opts.add_option("-s", "--sentences", dest="sentences", 247 help="read in a file of test sentences S", metavar="S") 248 opts.add_option("-e", "--no-eval", action="store_false", dest="evaluate", 249 help="just do a syntactic analysis") 250 opts.add_option("-b", "--no-beta-reduction", action="store_false", 251 dest="beta", help="don't carry out beta-reduction") 252 opts.add_option("-t", "--syntrace", action="count", dest="syntrace", 253 help="set syntactic tracing on; requires '-e' option") 254 opts.add_option("-T", "--semtrace", action="count", dest="semtrace", 255 help="set semantic tracing on") 256 257 (options, args) = opts.parse_args() 258 259 SPACER = '-' * 30 260 261 demo_model0() 262 263 sents = [ 264 'Fido sees a boy with Mary', 265 'John sees Mary', 266 'every girl chases a dog', 267 'every boy chases a girl', 268 'John walks with a girl in Noosa', 269 'who walks'] 270 271 gramfile = 'grammars/sem2.fcfg' 272 273 if options.sentences: 274 sentsfile = options.sentences 275 if options.grammar: 276 gramfile = options.grammar 277 if options.model: 278 exec "import %s as model" % options.model 279 280 if sents is None: 281 sents = read_sents(sentsfile) 282 283 gram = nltk.data.load(gramfile) 284 285 # Set model and assignment 286 model = m0 287 g = g0 288 289 if options.evaluate: 290 evaluations = \ 291 text_evaluate(sents, gram, model, g, semtrace=options.semtrace) 292 else: 293 semreps = \ 294 text_interpret(sents, gram, beta_reduce=options.beta, syntrace=options.syntrace) 295 296 for sent in sents: 297 n = 1 298 print '\nSentence: %s' % sent 299 print SPACER 300 if options.evaluate: 301 302 for (syntree, semrep, value) in evaluations[sent]: 303 if isinstance(value, dict): 304 value = set(value.keys()) 305 print '%d: %s' % (n, semrep.infixify()) 306 print value 307 n += 1 308 else: 309 310 for (syntree, semrep) in semreps[sent]: 311 # print '%d: %s' % (n, semrep.infixify()) 312 print '%d: %s' % (n, semrep) 313 n += 1

314 315 if __name__ == "__main__": 316 demo() 317

Source Code for Module nltk.sem.util