1
2
3
4
5
6
7
8
9
10 """
11 A graphical tool for exploring the recursive descent parser.
12
13 The recursive descent parser maintains a tree, which records the
14 structure of the portion of the text that has been parsed. It uses
15 CFG productions to expand the fringe of the tree, and matches its
16 leaves against the text. Initially, the tree contains the start
17 symbol ("S"). It is shown in the main canvas, to the right of the
18 list of available expansions.
19
20 The parser builds up a tree structure for the text using three
21 operations:
22
23 - "expand" uses a CFG production to add children to a node on the
24 fringe of the tree.
25 - "match" compares a leaf in the tree to a text token.
26 - "backtrack" returns the tree to its state before the most recent
27 expand or match operation.
28
29 The parser maintains a list of tree locations called a "frontier" to
30 remember which nodes have not yet been expanded and which leaves have
31 not yet been matched against the text. The leftmost frontier node is
32 shown in green, and the other frontier nodes are shown in blue. The
33 parser always performs expand and match operations on the leftmost
34 element of the frontier.
35
36 You can control the parser's operation by using the "expand," "match,"
37 and "backtrack" buttons; or you can use the "step" button to let the
38 parser automatically decide which operation to apply. The parser uses
39 the following rules to decide which operation to apply:
40
41 - If the leftmost frontier element is a token, try matching it.
42 - If the leftmost frontier element is a node, try expanding it with
43 the first untried expansion.
44 - Otherwise, backtrack.
45
46 The "expand" button applies the untried expansion whose CFG production
47 is listed earliest in the grammar. To manually choose which expansion
48 to apply, click on a CFG production from the list of available
49 expansions, on the left side of the main window.
50
51 The "autostep" button will let the parser continue applying
52 applications to the tree until it reaches a complete parse. You can
53 cancel an autostep in progress at any time by clicking on the
54 "autostep" button again.
55
56 Keyboard Shortcuts::
57 [Space]\t Perform the next expand, match, or backtrack operation
58 [a]\t Step through operations until the next complete parse
59 [e]\t Perform an expand operation
60 [m]\t Perform a match operation
61 [b]\t Perform a backtrack operation
62 [Delete]\t Reset the parser
63 [g]\t Show/hide available expansions list
64 [h]\t Help
65 [Ctrl-p]\t Print
66 [q]\t Quit
67 """
68
69 import string
70 import tkFont
71 from Tkinter import *
72
73 from nltk import parse, tokenize
74
75 from nltk.draw.tree import *
76 from nltk.draw import *
77 from nltk.draw.cfg import *
78
80 """
81 A graphical tool for exploring the recursive descent parser. The tool
82 displays the parser's tree and the remaining text, and allows the
83 user to control the parser's operation. In particular, the user
84 can expand subtrees on the frontier, match tokens on the frontier
85 against the text, and backtrack. A "step" button simply steps
86 through the parsing process, performing the operations that
87 C{RecursiveDescentParser} would use.
88 """
89 - def __init__(self, grammar, sent, trace=0):
126
127
128
129
130
132
133 self._sysfont = tkFont.Font(font=Button()["font"])
134 root.option_add("*Font", self._sysfont)
135
136
137 self._size = IntVar(root)
138 self._size.set(self._sysfont.cget('size'))
139
140 self._boldfont = tkFont.Font(family='helvetica', weight='bold',
141 size=self._size.get())
142 self._font = tkFont.Font(family='helvetica',
143 size=self._size.get())
144 if self._size.get() < 0: big = self._size.get()-2
145 else: big = self._size.get()+2
146 self._bigfont = tkFont.Font(family='helvetica', weight='bold',
147 size=big)
148
150
151 self._prodframe = listframe = Frame(parent)
152 self._prodframe.pack(fill='both', side='left', padx=2)
153 self._prodlist_label = Label(self._prodframe, font=self._boldfont,
154 text='Available Expansions')
155 self._prodlist_label.pack()
156 self._prodlist = Listbox(self._prodframe, selectmode='single',
157 relief='groove', background='white',
158 foreground='#909090', font=self._font,
159 selectforeground='#004040',
160 selectbackground='#c0f0c0')
161
162 self._prodlist.pack(side='right', fill='both', expand=1)
163
164 self._productions = list(self._parser.grammar().productions())
165 for production in self._productions:
166 self._prodlist.insert('end', (' %s' % production))
167 self._prodlist.config(height=min(len(self._productions), 25))
168
169
170 if len(self._productions) > 25:
171 listscroll = Scrollbar(self._prodframe,
172 orient='vertical')
173 self._prodlist.config(yscrollcommand = listscroll.set)
174 listscroll.config(command=self._prodlist.yview)
175 listscroll.pack(side='left', fill='y')
176
177
178 self._prodlist.bind('<<ListboxSelect>>', self._prodlist_select)
179
181
182 self._top.bind('<Control-q>', self.destroy)
183 self._top.bind('<Control-x>', self.destroy)
184 self._top.bind('<Escape>', self.destroy)
185 self._top.bind('e', self.expand)
186
187
188 self._top.bind('m', self.match)
189 self._top.bind('<Alt-m>', self.match)
190 self._top.bind('<Control-m>', self.match)
191 self._top.bind('b', self.backtrack)
192 self._top.bind('<Alt-b>', self.backtrack)
193 self._top.bind('<Control-b>', self.backtrack)
194 self._top.bind('<Control-z>', self.backtrack)
195 self._top.bind('<BackSpace>', self.backtrack)
196 self._top.bind('a', self.autostep)
197
198 self._top.bind('<Control-space>', self.autostep)
199 self._top.bind('<Control-c>', self.cancel_autostep)
200 self._top.bind('<space>', self.step)
201 self._top.bind('<Delete>', self.reset)
202 self._top.bind('<Control-p>', self.postscript)
203
204
205 self._top.bind('<Control-h>', self.help)
206 self._top.bind('<F1>', self.help)
207
208
209
210 self._top.bind('<Control-g>', self.edit_grammar)
211 self._top.bind('<Control-t>', self.edit_sentence)
212
232
233
234
235
236
243
245 self._feedbackframe = feedbackframe = Frame(parent)
246 feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3)
247 self._lastoper_label = Label(feedbackframe, text='Last Operation:',
248 font=self._font)
249 self._lastoper_label.pack(side='left')
250 lastoperframe = Frame(feedbackframe, relief='sunken', border=1)
251 lastoperframe.pack(fill='x', side='right', expand=1, padx=5)
252 self._lastoper1 = Label(lastoperframe, foreground='#007070',
253 background='#f0f0f0', font=self._font)
254 self._lastoper2 = Label(lastoperframe, anchor='w', width=30,
255 foreground='#004040', background='#f0f0f0',
256 font=self._font)
257 self._lastoper1.pack(side='left')
258 self._lastoper2.pack(side='left', fill='x', expand=1)
259
261 self._cframe = CanvasFrame(parent, background='white',
262
263 closeenough=10,
264 border=2, relief='sunken')
265 self._cframe.pack(expand=1, fill='both', side='top', pady=2)
266 canvas = self._canvas = self._cframe.canvas()
267
268
269 self._tree = None
270 self._textwidgets = []
271 self._textline = None
272
274 menubar = Menu(parent)
275
276 filemenu = Menu(menubar, tearoff=0)
277 filemenu.add_command(label='Reset Parser', underline=0,
278 command=self.reset, accelerator='Del')
279 filemenu.add_command(label='Print to Postscript', underline=0,
280 command=self.postscript, accelerator='Ctrl-p')
281 filemenu.add_command(label='Exit', underline=1,
282 command=self.destroy, accelerator='Ctrl-x')
283 menubar.add_cascade(label='File', underline=0, menu=filemenu)
284
285 editmenu = Menu(menubar, tearoff=0)
286 editmenu.add_command(label='Edit Grammar', underline=5,
287 command=self.edit_grammar,
288 accelerator='Ctrl-g')
289 editmenu.add_command(label='Edit Text', underline=5,
290 command=self.edit_sentence,
291 accelerator='Ctrl-t')
292 menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
293
294 rulemenu = Menu(menubar, tearoff=0)
295 rulemenu.add_command(label='Step', underline=1,
296 command=self.step, accelerator='Space')
297 rulemenu.add_separator()
298 rulemenu.add_command(label='Match', underline=0,
299 command=self.match, accelerator='Ctrl-m')
300 rulemenu.add_command(label='Expand', underline=0,
301 command=self.expand, accelerator='Ctrl-e')
302 rulemenu.add_separator()
303 rulemenu.add_command(label='Backtrack', underline=0,
304 command=self.backtrack, accelerator='Ctrl-b')
305 menubar.add_cascade(label='Apply', underline=0, menu=rulemenu)
306
307 viewmenu = Menu(menubar, tearoff=0)
308 viewmenu.add_checkbutton(label="Show Grammar", underline=0,
309 variable=self._show_grammar,
310 command=self._toggle_grammar)
311 viewmenu.add_separator()
312 viewmenu.add_radiobutton(label='Tiny', variable=self._size,
313 underline=0, value=10, command=self.resize)
314 viewmenu.add_radiobutton(label='Small', variable=self._size,
315 underline=0, value=12, command=self.resize)
316 viewmenu.add_radiobutton(label='Medium', variable=self._size,
317 underline=0, value=14, command=self.resize)
318 viewmenu.add_radiobutton(label='Large', variable=self._size,
319 underline=0, value=18, command=self.resize)
320 viewmenu.add_radiobutton(label='Huge', variable=self._size,
321 underline=0, value=24, command=self.resize)
322 menubar.add_cascade(label='View', underline=0, menu=viewmenu)
323
324 animatemenu = Menu(menubar, tearoff=0)
325 animatemenu.add_radiobutton(label="No Animation", underline=0,
326 variable=self._animation_frames,
327 value=0)
328 animatemenu.add_radiobutton(label="Slow Animation", underline=0,
329 variable=self._animation_frames,
330 value=10, accelerator='-')
331 animatemenu.add_radiobutton(label="Normal Animation", underline=0,
332 variable=self._animation_frames,
333 value=5, accelerator='=')
334 animatemenu.add_radiobutton(label="Fast Animation", underline=0,
335 variable=self._animation_frames,
336 value=2, accelerator='+')
337 menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
338
339
340 helpmenu = Menu(menubar, tearoff=0)
341 helpmenu.add_command(label='About', underline=0,
342 command=self.about)
343 helpmenu.add_command(label='Instructions', underline=0,
344 command=self.help, accelerator='F1')
345 menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
346
347 parent.config(menu=menubar)
348
349
350
351
352
353 - def _get(self, widget, treeloc):
358
359
360
361
362
364 canvas = self._canvas
365
366
367 if self._tree is not None:
368 self._cframe.destroy_widget(self._tree)
369 for twidget in self._textwidgets:
370 self._cframe.destroy_widget(twidget)
371 if self._textline is not None:
372 self._canvas.delete(self._textline)
373
374
375 helv = ('helvetica', -self._size.get())
376 bold = ('helvetica', -self._size.get(), 'bold')
377 attribs = {'tree_color': '#000000', 'tree_width': 2,
378 'node_font': bold, 'leaf_font': helv,}
379 tree = self._parser.tree()
380 self._tree = tree_to_treesegment(canvas, tree, **attribs)
381 self._cframe.add_widget(self._tree, 30, 5)
382
383
384 helv = ('helvetica', -self._size.get())
385 bottom = y = self._cframe.scrollregion()[3]
386 self._textwidgets = [TextWidget(canvas, word, font=self._font)
387 for word in self._sent]
388 for twidget in self._textwidgets:
389 self._cframe.add_widget(twidget, 0, 0)
390 twidget.move(0, bottom-twidget.bbox()[3]-5)
391 y = min(y, twidget.bbox()[1])
392
393
394 self._textline = canvas.create_line(-5000, y-5, 5000, y-5, dash='.')
395
396
397 self._highlight_nodes()
398 self._highlight_prodlist()
399
400
401 self._position_text()
402
403
409
411
412 bold = ('helvetica', -self._size.get(), 'bold')
413 for treeloc in self._parser.frontier()[:1]:
414 self._get(self._tree, treeloc)['color'] = '#20a050'
415 self._get(self._tree, treeloc)['font'] = bold
416 for treeloc in self._parser.frontier()[1:]:
417 self._get(self._tree, treeloc)['color'] = '#008080'
418
437
438 - def _position_text(self):
439
440 numwords = len(self._sent)
441 num_matched = numwords - len(self._parser.remaining_text())
442 leaves = self._tree_leaves()[:num_matched]
443 xmax = self._tree.bbox()[0]
444 for i in range(0, len(leaves)):
445 widget = self._textwidgets[i]
446 leaf = leaves[i]
447 widget['color'] = '#006040'
448 leaf['color'] = '#006040'
449 widget.move(leaf.bbox()[0] - widget.bbox()[0], 0)
450 xmax = widget.bbox()[2] + 10
451
452
453 for i in range(len(leaves), numwords):
454 widget = self._textwidgets[i]
455 widget['color'] = '#a0a0a0'
456 widget.move(xmax - widget.bbox()[0], 0)
457 xmax = widget.bbox()[2] + 10
458
459
460 if self._parser.currently_complete():
461 for twidget in self._textwidgets:
462 twidget['color'] = '#00a000'
463
464
465 for i in range(0, len(leaves)):
466 widget = self._textwidgets[i]
467 leaf = leaves[i]
468 dy = widget.bbox()[1] - leaf.bbox()[3] - 10.0
469 dy = max(dy, leaf.parent().node().bbox()[3] - leaf.bbox()[3] + 10)
470 leaf.move(0, dy)
471
480
481
482
483
484
486 self._autostep = 0
487 if self._top is None: return
488 self._top.destroy()
489 self._top = None
490
492 self._autostep = 0
493 self._parser.initialize(self._sent)
494 self._lastoper1['text'] = 'Reset Demo'
495 self._lastoper2['text'] = ''
496 self._redraw()
497
499 if self._animation_frames.get() == 0:
500 self._animation_frames.set(2)
501 if self._autostep:
502 self._autostep = 0
503 else:
504 self._autostep = 1
505 self._step()
506
508
509 self._autostep = 0
510
511
512 - def step(self, *e): self._autostep = 0; self._step()
513 - def match(self, *e): self._autostep = 0; self._match()
516
518 if self._animating_lock: return
519
520
521 if self._expand(): pass
522 elif self._parser.untried_match() and self._match(): pass
523 elif self._backtrack(): pass
524 else:
525 self._lastoper1['text'] = 'Finished'
526 self._lastoper2['text'] = ''
527 self._autostep = 0
528
529
530 if self._parser.currently_complete():
531 self._autostep = 0
532 self._lastoper2['text'] += ' [COMPLETE PARSE]'
533
535 if self._animating_lock: return
536 old_frontier = self._parser.frontier()
537 rv = self._parser.expand()
538 if rv is not None:
539 self._lastoper1['text'] = 'Expand:'
540 self._lastoper2['text'] = rv
541 self._prodlist.selection_clear(0, 'end')
542 index = self._productions.index(rv)
543 self._prodlist.selection_set(index)
544 self._animate_expand(old_frontier[0])
545 return 1
546 else:
547 self._lastoper1['text'] = 'Expand:'
548 self._lastoper2['text'] = '(all expansions tried)'
549 return 0
550
552 if self._animating_lock: return
553 old_frontier = self._parser.frontier()
554 rv = self._parser.match()
555 if rv is not None:
556 self._lastoper1['text'] = 'Match:'
557 self._lastoper2['text'] = rv
558 self._animate_match(old_frontier[0])
559 return 1
560 else:
561 self._lastoper1['text'] = 'Match:'
562 self._lastoper2['text'] = '(failed)'
563 return 0
564
566 if self._animating_lock: return
567 if self._parser.backtrack():
568 elt = self._parser.tree()
569 for i in self._parser.frontier()[0]:
570 elt = elt[i]
571 self._lastoper1['text'] = 'Backtrack'
572 self._lastoper2['text'] = ''
573 if isinstance(elt, Tree):
574 self._animate_backtrack(self._parser.frontier()[0])
575 else:
576 self._animate_match_backtrack(self._parser.frontier()[0])
577 return 1
578 else:
579 self._autostep = 0
580 self._lastoper1['text'] = 'Finished'
581 self._lastoper2['text'] = ''
582 return 0
583
585 ABOUT = ("NLTK Recursive Descent Parser Demo\n"+
586 "Written by Edward Loper")
587 TITLE = 'About: Recursive Descent Parser Demo'
588 try:
589 from tkMessageBox import Message
590 Message(message=ABOUT, title=TITLE).show()
591 except:
592 ShowText(self._top, TITLE, ABOUT)
593
594 - def help(self, *e):
595 self._autostep = 0
596
597 try:
598 ShowText(self._top, 'Help: Recursive Descent Parser Demo',
599 (__doc__).strip(), width=75, font='fixed')
600 except:
601 ShowText(self._top, 'Help: Recursive Descent Parser Demo',
602 (__doc__).strip(), width=75)
603
604 - def postscript(self, *e):
605 self._autostep = 0
606 self._cframe.print_to_file()
607
608 - def mainloop(self, *args, **kwargs):
609 """
610 Enter the Tkinter mainloop. This function must be called if
611 this demo is created from a non-interactive program (e.g.
612 from a secript); otherwise, the demo will close as soon as
613 the script completes.
614 """
615 if in_idle(): return
616 self._top.mainloop(*args, **kwargs)
617
626
627
628
629
630
632 if self._show_grammar.get():
633 self._prodframe.pack(fill='both', side='left', padx=2,
634 after=self._feedbackframe)
635 self._lastoper1['text'] = 'Show Grammar'
636 else:
637 self._prodframe.pack_forget()
638 self._lastoper1['text'] = 'Hide Grammar'
639 self._lastoper2['text'] = ''
640
641
642
643
644
645
646
647
648
649
650
651
671
672
673
674
675
677 oldwidget = self._get(self._tree, treeloc)
678 oldtree = oldwidget.parent()
679 top = not isinstance(oldtree.parent(), TreeSegmentWidget)
680
681 tree = self._parser.tree()
682 for i in treeloc:
683 tree = tree[i]
684
685 widget = tree_to_treesegment(self._canvas, tree,
686 node_font=self._boldfont,
687 leaf_color='white',
688 tree_width=2, tree_color='white',
689 node_color='white',
690 leaf_font=self._font)
691 widget.node()['color'] = '#20a050'
692
693 (oldx, oldy) = oldtree.node().bbox()[:2]
694 (newx, newy) = widget.node().bbox()[:2]
695 widget.move(oldx-newx, oldy-newy)
696
697 if top:
698 self._cframe.add_widget(widget, 0, 5)
699 widget.move(30-widget.node().bbox()[0], 0)
700 self._tree = widget
701 else:
702 oldtree.parent().replace_child(oldtree, widget)
703
704
705
706 if widget.subtrees():
707 dx = (oldx + widget.node().width()/2 -
708 widget.subtrees()[0].bbox()[0]/2 -
709 widget.subtrees()[0].bbox()[2]/2)
710 for subtree in widget.subtrees(): subtree.move(dx, 0)
711
712 self._makeroom(widget)
713
714 if top:
715 self._cframe.destroy_widget(oldtree)
716 else:
717 oldtree.destroy()
718
719 colors = ['gray%d' % (10*int(10*x/self._animation_frames.get()))
720 for x in range(self._animation_frames.get(),0,-1)]
721
722
723 dy = widget.bbox()[3] + 30 - self._canvas.coords(self._textline)[1]
724 if dy > 0:
725 for twidget in self._textwidgets: twidget.move(0, dy)
726 self._canvas.move(self._textline, 0, dy)
727
728 self._animate_expand_frame(widget, colors)
729
753
755 if len(colors) > 0:
756 self._animating_lock = 1
757 widget['color'] = colors[0]
758 for subtree in widget.subtrees():
759 if isinstance(subtree, TreeSegmentWidget):
760 subtree.node()['color'] = colors[0]
761 else:
762 subtree['color'] = colors[0]
763 self._top.after(50, self._animate_expand_frame,
764 widget, colors[1:])
765 else:
766 widget['color'] = 'black'
767 for subtree in widget.subtrees():
768 if isinstance(subtree, TreeSegmentWidget):
769 subtree.node()['color'] = 'black'
770 else:
771 subtree['color'] = 'black'
772 self._redraw_quick()
773 widget.node()['color'] = 'black'
774 self._animating_lock = 0
775 if self._autostep: self._step()
776
792
806
814
816 widget = self._get(self._tree, treeloc)
817
818 dy = ((self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) /
819 max(1, self._animation_frames.get()))
820 self._animate_match_frame(self._animation_frames.get(), widget, dy)
821
823 if frame > 0:
824 self._animating_lock = 1
825 widget.move(0, dy)
826 self._top.after(10, self._animate_match_frame,
827 frame-1, widget, dy)
828 else:
829 widget['color'] = '#006040'
830 self._redraw_quick()
831 self._animating_lock = 0
832 if self._autostep: self._step()
833
845
848
855
861
863 self._sent = sentence.split()
864 self.reset()
865
867 """
868 Create a recursive descent parser demo, using a simple grammar and
869 text.
870 """
871 from nltk import cfg
872 grammar = cfg.parse_cfg("""
873 # Grammatical productions.
874 S -> NP VP
875 NP -> Det N PP | Det N
876 VP -> V NP PP | V NP | V
877 PP -> P NP
878 # Lexical productions.
879 NP -> 'I'
880 Det -> 'the' | 'a'
881 N -> 'man' | 'park' | 'dog' | 'telescope'
882 V -> 'ate' | 'saw'
883 P -> 'in' | 'under' | 'with'
884 """)
885
886 sent = 'the dog saw a man in the park'.split()
887
888 RecursiveDescentDemo(grammar, sent).mainloop()
889
890 if __name__ == '__main__': demo()
891