1
2
3
4
5
6
7 """
8 Utility functions for batch-processing sentences: parsing and
9 extraction of the semantic representation of the root node of the the
10 syntax tree, followed by evaluation of the semantic representation in
11 a first-order model.
12 """
13
14 import evaluate
15 import re
16 import nltk
17 from logic import LogicParser, ParseException
18
19
20
21
22
23
24 -def text_parse(inputs, grammar, trace=0):
25 """
26 Convert input sentences into syntactic trees.
27
28 @parameter inputs: sentences to be parsed
29 @type inputs: C{list} of C{str}
30 @parameter grammar: feature-based grammar to use in parsing
31 @rtype: C{dict}
32 @return: a mapping from input sentences to a list of L{Tree}s
33 """
34 parses = {}
35 cp = nltk.parse.FeatureEarleyChartParser(grammar, trace=trace)
36 for sent in inputs:
37 tokens = sent.split()
38 syntrees = cp.nbest_parse(tokens)
39 parses[sent] = syntrees
40 return parses
41
42 -def _semrep(node, beta_reduce=True):
43 """
44 Find the semantic representation at a given tree node.
45
46 @parameter node: node of a parse L{Tree}
47 @rtype: L{logic.Expression}
48 """
49 assert isinstance(node, nltk.cfg.FeatStructNonterminal)
50 try:
51 semrep = node['sem']
52 if beta_reduce:
53 semrep = semrep.simplify()
54 return semrep
55 except KeyError:
56 print "Node has no 'sem' feature specification"
57 raise
58
60 """
61 Find the semantic representation at the root of a tree.
62
63 @parameter syntree: a parse L{Tree}
64 @parameter beta_reduce: if C{True}, carry out beta reduction on the logical forms that are returned
65 @return: the semantic representation at the root of a L{Tree}
66 @rtype: L{logic.Expression}
67 """
68 return _semrep(syntree.node, beta_reduce=beta_reduce)
69
70 -def text_interpret(inputs, grammar, beta_reduce=True, start='S', syntrace=0):
71 """
72 Add the semantic representation to each syntactic parse tree
73 of each input sentence.
74
75 @parameter inputs: a list of sentences
76 @parameter grammar: a feature-based grammar
77 @return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
78 @rtype: C{dict}
79 """
80 parses = text_parse(inputs, grammar, trace=syntrace)
81 semreps = {}
82 for sent in inputs:
83 syntrees = parses[sent]
84 syn_sem = \
85 [(syn, root_semrep(syn, beta_reduce=beta_reduce, start=start))
86 for syn in syntrees]
87 semreps[sent] = syn_sem
88 return semreps
89
90 -def text_evaluate(inputs, grammar, model, assignment, semtrace=0):
91 """
92 Add the truth-in-a-model value to each semantic representation
93 for each syntactic parse of each input sentences.
94
95 @return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
96 @rtype: C{dict}
97 """
98 g = assignment
99 m = model
100 semreps = text_interpret(inputs, grammar)
101 evaluations = {}
102 for sent in inputs:
103 syn_sem_val = \
104 [(syn, sem, m.evaluate(str(sem), g, trace=semtrace)) for (syn, sem) in semreps[sent]]
105 evaluations[sent] = syn_sem_val
106 return evaluations
107
108
109
110
111 _VAL_SPLIT_RE = re.compile(r'\s*=+>\s*')
112 _ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*')
113 _TUPLES_RE = re.compile(r"""\s*
114 (\([^)]+\)) # tuple-expression
115 \s*""", re.VERBOSE)
116
118 """
119 Parse a line in a valuation file.
120
121 Lines are expected to be of the form::
122
123 noosa => n
124 girl => {g1, g2}
125 chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
126
127 @parameter s: input line
128 @type s: C{str}
129 @return: a pair (symbol, value)
130 @rtype: C{tuple}
131 """
132 pieces = _VAL_SPLIT_RE.split(s)
133 symbol = pieces[0]
134 value = pieces[1]
135
136 if value.startswith('{'):
137 value = value[1:-1]
138 tuple_strings = _TUPLES_RE.findall(value)
139
140 if tuple_strings:
141 set_elements = []
142 for ts in tuple_strings:
143 ts = ts[1:-1]
144 element = tuple(_ELEMENT_SPLIT_RE.split(ts))
145 set_elements.append(element)
146 else:
147 set_elements = _ELEMENT_SPLIT_RE.split(value)
148 value = set(set_elements)
149 return symbol, value
150
152 """
153 Convert a valuation file into a valuation.
154
155 @parameter s: the contents of a valuation file
156 @type s: C{str}
157 @return: a L{nltk.sem} valuation
158 @rtype: L{Valuation}
159 """
160 statements = []
161 for linenum, line in enumerate(s.splitlines()):
162 line = line.strip()
163 if line.startswith('#') or line=='': continue
164 try: statements.append(parse_valuation_line(line))
165 except ValueError:
166 raise ValueError, 'Unable to parse line %s: %s' % (linenum, line)
167 val = evaluate.Valuation(statements)
168 return val
169
171 """
172 Convert a file of First Order Formulas into a list of {Expression}s.
173
174 @parameter s: the contents of the file
175 @type s: C{str}
176 @return: a list of parsed formulas.
177 @rtype: C{list} of L{Expression}
178 """
179 statements = []
180 lp = LogicParser()
181 for linenum, line in enumerate(s.splitlines()):
182 line = line.strip()
183 if line.startswith('#') or line=='': continue
184 try:
185 statements.append(lp.parse(line))
186 except ParseException:
187 raise ValueError, 'Unable to parse line %s: %s' % (linenum, line)
188 return statements
189
191 global m0, g0
192 val = evaluate.Valuation()
193
194 v = [('john', 'b1'),
195 ('mary', 'g1'),
196 ('suzie', 'g2'),
197 ('fido', 'd1'),
198 ('tess', 'd2'),
199 ('noosa', 'n'),
200 ('girl', set(['g1', 'g2'])),
201 ('boy', set(['b1', 'b2'])),
202 ('dog', set(['d1', 'd2'])),
203 ('bark', set(['d1', 'd2'])),
204 ('walk', set(['b1', 'g2', 'd1'])),
205 ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])),
206 ('see', set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'),('d2', 'b1'), ('g2', 'n')])),
207 ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])),
208 ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')]))
209 ]
210
211 val.read(v)
212
213 dom = val.domain
214
215 m0 = evaluate.Model(dom, val)
216
217 g0 = evaluate.Assignment(dom)
218
219
226
228 import sys
229 from optparse import OptionParser
230 description = \
231 """
232 Parse and evaluate some sentences.
233 """
234
235 opts = OptionParser(description=description)
236
237 opts.set_defaults(evaluate=True, beta=True, syntrace=0,
238 semtrace=0, demo='default', grammar='', sentences='')
239
240 opts.add_option("-d", "--demo", dest="demo",
241 help="choose demo D; omit this for the default demo, or specify 'chat80'", metavar="D")
242 opts.add_option("-g", "--gram", dest="grammar",
243 help="read in grammar G", metavar="G")
244 opts.add_option("-m", "--model", dest="model",
245 help="import model M (omit '.py' suffix)", metavar="M")
246 opts.add_option("-s", "--sentences", dest="sentences",
247 help="read in a file of test sentences S", metavar="S")
248 opts.add_option("-e", "--no-eval", action="store_false", dest="evaluate",
249 help="just do a syntactic analysis")
250 opts.add_option("-b", "--no-beta-reduction", action="store_false",
251 dest="beta", help="don't carry out beta-reduction")
252 opts.add_option("-t", "--syntrace", action="count", dest="syntrace",
253 help="set syntactic tracing on; requires '-e' option")
254 opts.add_option("-T", "--semtrace", action="count", dest="semtrace",
255 help="set semantic tracing on")
256
257 (options, args) = opts.parse_args()
258
259 SPACER = '-' * 30
260
261 demo_model0()
262
263 sents = [
264 'Fido sees a boy with Mary',
265 'John sees Mary',
266 'every girl chases a dog',
267 'every boy chases a girl',
268 'John walks with a girl in Noosa',
269 'who walks']
270
271 gramfile = 'grammars/sem2.fcfg'
272
273 if options.sentences:
274 sentsfile = options.sentences
275 if options.grammar:
276 gramfile = options.grammar
277 if options.model:
278 exec "import %s as model" % options.model
279
280 if sents is None:
281 sents = read_sents(sentsfile)
282
283 gram = nltk.data.load(gramfile)
284
285
286 model = m0
287 g = g0
288
289 if options.evaluate:
290 evaluations = \
291 text_evaluate(sents, gram, model, g, semtrace=options.semtrace)
292 else:
293 semreps = \
294 text_interpret(sents, gram, beta_reduce=options.beta, syntrace=options.syntrace)
295
296 for sent in sents:
297 n = 1
298 print '\nSentence: %s' % sent
299 print SPACER
300 if options.evaluate:
301
302 for (syntree, semrep, value) in evaluations[sent]:
303 if isinstance(value, dict):
304 value = set(value.keys())
305 print '%d: %s' % (n, semrep.infixify())
306 print value
307 n += 1
308 else:
309
310 for (syntree, semrep) in semreps[sent]:
311
312 print '%d: %s' % (n, semrep)
313 n += 1
314
315 if __name__ == "__main__":
316 demo()
317