1
2
3
4
5
6
7
8
9 import os
10
11 from nltk.sem import root_semrep, Expression
12 from nltk import parse
13 from nltk.data import show_cfg
14
15 from nltk.inference import MaceCommand, spacer, get_prover
16
17 """
18 Module for incrementally developing simple discourses, and checking for semantic ambiguity,
19 consistency and informativeness.
20
21 Many of the ideas are based on the CURT family of programs of Blackburn and Bos
22 (see U{http://homepages.inf.ed.ac.uk/jbos/comsem/book1.html}).
23
24 Consistency checking is carried out by using the L{mace} module to call the Mace4 model builder.
25 Informativeness checking is carried out with a call to C{get_prover()} from
26 the L{inference} module.
27
28 C{DiscourseTester} is a constructor for discourses.
29 The basic data structure is a list of sentences, stored as C{self._sentences}. Each sentence in the list
30 is assigned a I{sentence ID} (C{sid}) of the form C{s}I{i}. For example::
31
32 s0: A boxer walks
33 s1: Every boxer chases a girl
34
35 Each sentence can be ambiguous between a number of readings, each of which receives a
36 I{reading ID} (C{rid}) of the form C{s}I{i} -C{r}I{j}. For example::
37
38 s0 readings:
39 ------------------------------
40 s0-r1: some x.((boxer x) and (walk x))
41 s0-r0: some x.((boxerdog x) and (walk x))
42
43 A I{thread} is a list of readings, represented
44 as a list of C{rid}s. Each thread receives a I{thread ID} (C{tid}) of the form C{d}I{i}.
45 For example::
46
47 d0: ['s0-r0', 's1-r0']
48
49 The set of all threads for a discourse is the Cartesian product of all the readings of the sequences of sentences.
50 (This is not intended to scale beyond very short discourses!) The method L{readings(filter=True)} will only show
51 those threads which are consistent (taking into account any background assumptions).
52 """
53
54
55
57 """
58 Check properties of an ongoing discourse.
59 """
60 - def __init__(self, input, gramfile=None, background=None):
61 """
62 Initialize a C{DiscourseTester}.
63
64 @parameter input: the discourse sentences
65 @type input: C{list} of C{str}
66 @parameter gramfile: name of file where grammar can be loaded
67 @type gramfile: C{str}
68 @parameter background: Formulas which express background assumptions
69 @type background: C{list} of L{logic.Expression}.
70 """
71 self._input = input
72 self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(input)])
73 self._models = None
74 self._readings = {}
75 if gramfile is None:
76 self._gramfile = 'grammars/sem4.fcfg'
77 else:
78 self._gramfile = gramfile
79 self._threads = {}
80 self._filtered_threads = {}
81 self._parser = parse.load_earley(self._gramfile)
82 if background is not None:
83 for e in background:
84 assert isinstance(e, Expression)
85 self._background = background
86 else:
87 self._background = []
88
89
90
91
92
94 """
95 Display the list of sentences in the current discourse.
96 """
97 for id in sorted(self._sentences.keys()):
98 print "%s: %s" % (id, self._sentences[id])
99
100 - def add_sentence(self, sentence, informchk=False, consistchk=False,):
101 """
102 Add a sentence to the current discourse.
103
104 Updates C{self._input} and C{self._sentences}.
105 @parameter sentence: An input sentence
106 @type sentence: C{str}
107 @parameter informchk: if C{True}, check that the result of adding the sentence is thread-informative. Updates C{self._readings}.
108 @parameter consistchk: if C{True}, check that the result of adding the sentence is thread-consistent. Updates C{self._readings}.
109
110 """
111
112 if informchk:
113 self.readings(quiet=True)
114 for tid in sorted(self._threads.keys()):
115 assumptions = [reading for (rid, reading) in self.expand_threads(tid)]
116 assumptions += self._background
117 for sent_reading in self._get_readings(sentence):
118 tp = get_prover(goal=sent_reading, assumptions=assumptions, prover_name='Prover9')
119 if tp.prove():
120 print "Sentence '%s' under reading '%s':" % (sentence, str(sent_reading))
121 print "Not informative relative to thread '%s'" % tid
122
123 self._input.append(sentence)
124 self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(self._input)])
125
126
127 if consistchk:
128 self.readings(quiet=True)
129 self.models(show=False)
130
132 """
133 Remove a sentence from the current discourse.
134
135 Updates C{self._input}, C{self._sentences} and C{self._readings}.
136 @parameter sentence: An input sentence
137 @type sentence: C{str}
138 @parameter quiet: If C{False}, report on the updated list of sentences.
139 """
140 try:
141 self._input.remove(sentence)
142 except ValueError:
143 print "Retraction failed. The sentence '%s' is not part of the current discourse:" % sentence
144 self.sentences()
145 return None
146 self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(self._input)])
147 self.readings(quiet=True)
148 if not quiet:
149 print "Current sentences are "
150 self.sentences()
151
153 """
154 Print out the grammar in use for parsing input sentences
155 """
156 show_cfg(self._gramfile)
157
158
159
160
161
171
173 """
174 Use C{self._sentences} to construct a value for C{self._readings}.
175 """
176
177 self._readings = {}
178 for sid in self._sentences:
179 readings = self._get_readings(self._sentences[sid])
180 self._readings[sid] = dict([("%s-r%s" % (sid, rid), reading)
181 for rid, reading in enumerate(readings)])
182
184 """
185 Use C{self._readings} to construct a value for C{self._threads}
186 and use the model builder to construct a value for C{self._filtered_threads}
187 """
188 thread_list = [[]]
189 for sid in sorted(self._readings.keys()):
190 thread_list = self.multiply(thread_list, sorted(self._readings[sid].keys()))
191 self._threads = dict([("d%s" % tid, thread) for tid, thread in enumerate(thread_list)])
192
193 self._filtered_threads = {}
194
195 for (tid, thread) in self._threads.items():
196 if (tid, True) in self._check_consistency(self._threads):
197 self._filtered_threads[tid] = thread
198
199
201 """
202 Print out the readings for the discourse (or a single sentence).
203 """
204 if sentence is not None:
205 print "The sentence '%s' has these readings:" % sentence
206 for r in [str(reading) for reading in (self._get_readings(sentence))]:
207 print " %s" % r
208 else:
209 for sid in sorted(self._readings.keys()):
210 print
211 print '%s readings:' % sid
212 print '-' * 30
213 for rid in sorted(self._readings[sid]):
214 lf = self._readings[sid][rid]
215
216 print "%s: %s" % (rid, lf)
217
219 """
220 Print out the value of C{self._threads} or C{self._filtered_hreads}
221 """
222 if filter:
223 threads = self._filtered_threads
224 else:
225 threads = self._threads
226 for tid in sorted(threads.keys()):
227 print "%s:" % tid, self._threads[tid]
228
229
230 - def readings(self, sentence=None, threaded=False, quiet=False, filter=False):
231 """
232 Construct and show the readings of the discourse (or of a single sentence).
233
234 @parameter sentence: test just this sentence
235 @type sentence: C{str}
236 @parameter threaded: if C{True}, print out each thread ID and the corresponding thread.
237 @parameter filter: if C{True}, only print out consistent thread IDs and threads.
238 """
239 self._construct_readings()
240 self._construct_threads()
241
242
243 if filter: threaded=True
244 if not quiet:
245 if not threaded:
246 self._show_readings(sentence=sentence)
247 else:
248 self._show_threads(filter=filter)
249
251 """
252 Given a thread ID, find the list of L{logic.Expression}s corresponding to the reading IDs in that thread.
253
254 @parameter thread_id: thread ID
255 @type thread_id: C{str}
256 @parameter threads: a mapping from thread IDs to lists of reading IDs
257 @type threads: C{dict}
258 @return: A list of pairs (C{rid}, I{reading}) where I{reading} is the L{logic.Expression} associated with a reading ID
259 @rtype: C{list} of C{tuple}
260 """
261 if threads is None:
262 threads = self._threads
263 return [(rid, self._readings[sid][rid]) for rid in threads[thread_id] for sid in rid.split('-')[:1]]
264
265
266
267
268
269
292
293 - def models(self, thread_id=None, show=True, quiet=True):
294 """
295 Call Mace4 to build a model for each current discourse thread.
296
297 @parameter thread_id: thread ID
298 @type thread_id: C{str}
299 @parameter show: If C{True}, display the model that has been found.
300 """
301 self._construct_readings()
302 self._construct_threads()
303 if thread_id is None:
304 threads = self._threads
305 else:
306 threads = {thread_id: self._threads[thread_id]}
307
308 for (tid, modelfound) in self._check_consistency(threads, show=show, quiet=quiet):
309 idlist = [rid for rid in threads[tid]]
310
311 if not modelfound:
312 print "Inconsistent discourse %s %s:" % (tid, idlist)
313 for rid, reading in [(rid, str(reading)) for (rid, reading) in self.expand_threads(tid)]:
314 print " %s: %s" % (rid, reading)
315 print
316 else:
317 print "Consistent discourse: %s %s:" % (tid, idlist)
318 for rid, reading in [(rid, str(reading)) for (rid, reading) in self.expand_threads(tid)]:
319 print " %s: %s" % (rid, reading)
320 print
321
323 """
324 Add a list of background assumptions for reasoning about the discourse.
325
326 When called, this method also updates the discourse model's set of readings and threads.
327 @parameter background: Formulas which contain background information
328 @type background: C{list} of L{logic.Expression}.
329 """
330 for (count, e) in enumerate(background):
331 assert isinstance(e, Expression)
332 if not quiet:
333 print "Adding assumption %s to background" % count
334 self._background.append(e)
335
336
337 self._construct_readings()
338 self._construct_threads()
339
341 """
342 Show the current background assumptions.
343 """
344 for e in self._background:
345 print str(e)
346
347
348
349
350
351 @staticmethod
353 """
354 Multiply every thread in C{discourse} by every reading in C{readings}.
355
356 Given discourse = [['A'], ['B']], readings = ['a', 'b', 'c'] , returns
357 [['A', 'a'], ['A', 'b'], ['A', 'c'], ['B', 'a'], ['B', 'b'], ['B', 'c']]
358
359 @parameter discourse: the current list of readings
360 @type discourse: C{list} of C{list}s
361 @parameter readings: an additional list of readings
362 @type readings: C{list} of C{logic.Expression}s
363 @rtype: A C{list} of C{list}s
364 """
365 result = []
366 for sublist in discourse:
367 for r in readings:
368 new = []
369 new += sublist
370 new.append(r)
371 result.append(new)
372 return result
373
374
375
376
377
378
380 """
381 Temporarily duplicated from L{nltk.sem.util}.
382 Convert a file of First Order Formulas into a list of C{Expression}s.
383
384 @parameter s: the contents of the file
385 @type s: C{str}
386 @return: a list of parsed formulas.
387 @rtype: C{list} of L{Expression}
388 """
389 from nltk.sem import LogicParser
390 statements = []
391 lp = LogicParser()
392 for linenum, line in enumerate(s.splitlines()):
393 line = line.strip()
394 if line.startswith('#') or line=='': continue
395 try:
396 statements.append(lp.parse(line))
397 except Error:
398 raise ValueError, 'Unable to parse line %s: %s' % (linenum, line)
399 return statements
400
401
402
403
404
406 """
407 Illustrate the various methods of C{DiscourseTester}
408 """
409 dt = DiscourseTester(['A boxer walks', 'Every boxer chases a girl'])
410 dt.models()
411 print
412
413 print
414 dt.sentences()
415 print
416 dt.readings()
417 print
418 dt.readings(threaded=True)
419 print
420 dt.models('d1')
421 dt.add_sentence('John is a boxer')
422 print
423 dt.sentences()
424 print
425 dt.readings(threaded=True)
426 print
427 dt = DiscourseTester(['A student dances', 'Every student is a person'])
428 print
429 dt.add_sentence('No person dances', consistchk=True)
430 print
431 dt.readings()
432 print
433 dt.retract_sentence('No person dances', quiet=False)
434 print
435 dt.models()
436 print
437 dt.readings('A person dances')
438 print
439 dt.add_sentence('A person dances', informchk=True)
440 dt = DiscourseTester(['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks'])
441 dt.readings(filter=True)
442 import nltk.data
443 world = nltk.data.load('/grammars/world.fol')
444 print
445 dt.add_background(world, quiet=True)
446 dt.background()
447 print
448 dt.readings(filter=True)
449 print
450 dt.models()
451
452
453
454 if __name__ == '__main__':
455 discourse_demo()
456