nltk.draw.cfg

1 # Natural Language Toolkit: CFG visualization 2 # 3 # Copyright (C) 2001-2008 NLTK Project 4 # Author: Edward Loper <[email protected]> 5 # URL: <http://nltk.org> 6 # For license information, see LICENSE.TXT 7 # 8 # $Id: cfg.py 6265 2008-07-26 09:25:03Z stevenbird $ 9 10 """ 11 Visualization tools for CFGs. 12 """ 13 14 import re 15 16 17 """ 18 Idea for a nice demo: 19 - 3 panes: grammar, treelet, working area 20 - grammar is a list of productions 21 - when you select a production, the treelet that it licenses appears 22 in the treelet area 23 - the working area has the text on the bottom, and S at top. When 24 you select a production, it shows (ghosted) the locations where 25 that production's treelet could be attached to either the text 26 or the tree rooted at S. 27 - the user can drag the treelet onto one of those (or click on them?) 28 - the user can delete pieces of the tree from the working area 29 (right click?) 30 - connecting top to bottom? drag one NP onto another? 31 32 +-------------------------------------------------------------+ 33 | S -> NP VP | S | 34 |[NP -> Det N ]| / \ | 35 | ... | NP VP | 36 | N -> 'dog' | | 37 | N -> 'cat' | | 38 | ... | | 39 +--------------+ | 40 | NP | Det N | 41 | / \ | | | | 42 | Det N | the cat saw the dog | 43 | | | 44 +--------------+----------------------------------------------+ 45 46 Operations: 47 - connect a new treelet -- drag or click shadow 48 - delete a treelet -- right click 49 - if only connected to top, delete everything below 50 - if only connected to bottom, delete everything above 51 - connect top & bottom -- drag a leaf to a root or a root to a leaf 52 - disconnect top & bottom -- right click 53 - if connected to top & bottom, then disconnect 54 """ 55 56 from nltk.draw import * 57 from nltk.cfg import * 58 from Tkinter import * 59 from nltk.tree import * 60 from nltk.draw.tree import * 61 62 63 ###################################################################### 64 # Production List 65 ###################################################################### 66

67 -class ProductionList(ColorizedList):

68 ARROW = SymbolWidget.SYMBOLS['rightarrow'] 69

70 - def _init_colortags(self, textwidget, options):

71 textwidget.tag_config('terminal', foreground='#006000') 72 textwidget.tag_config('arrow', font='symbol', underline='0') 73 textwidget.tag_config('nonterminal', foreground='blue', 74 font=('helvetica', -12, 'bold'))

75

76 - def _item_repr(self, item):

77 contents = [] 78 contents.append(('%s\t' % item.lhs(), 'nonterminal')) 79 contents.append((self.ARROW, 'arrow')) 80 for elt in item.rhs(): 81 if isinstance(elt, Nonterminal): 82 contents.append((' %s' % elt.symbol(), 'nonterminal')) 83 else: 84 contents.append((' %r' % elt, 'terminal')) 85 return contents

86 87 ###################################################################### 88 # CFG Editor 89 ###################################################################### 90 91 _CFGEditor_HELP = """ 92 93 The CFG Editor can be used to create or modify context free grammars. 94 A context free grammar consists of a start symbol and a list of 95 productions. The start symbol is specified by the text entry field in 96 the upper right hand corner of the editor; and the list of productions 97 are specified in the main text editing box. 98 99 Every non-blank line specifies a single production. Each production 100 has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS 101 is a list of nonterminals and terminals. 102 103 Nonterminals must be a single word, such as S or NP or NP_subj. 104 Currently, nonterminals must consists of alphanumeric characters and 105 underscores (_). Nonterminals are colored blue. If you place the 106 mouse over any nonterminal, then all occurances of that nonterminal 107 will be highlighted. 108 109 Termianals must be surrounded by single quotes (') or double 110 quotes(\"). For example, "dog" and "New York" are terminals. 111 Currently, the string within the quotes must consist of alphanumeric 112 characters, underscores, and spaces. 113 114 To enter a new production, go to a blank line, and type a nonterminal, 115 followed by an arrow (->), followed by a sequence of terminals and 116 nonterminals. Note that "->" (dash + greater-than) is automatically 117 converted to an arrow symbol. When you move your cursor to a 118 different line, your production will automatically be colorized. If 119 there are any errors, they will be highlighted in red. 120 121 Note that the order of the productions is signifigant for some 122 algorithms. To re-order the productions, use cut and paste to move 123 them. 124 125 Use the buttons at the bottom of the window when you are done editing 126 the CFG: 127 - Ok: apply the new CFG, and exit the editor. 128 - Apply: apply the new CFG, and do not exit the editor. 129 - Reset: revert to the original CFG, and do not exit the editor. 130 - Cancel: revert to the original CFG, and exit the editor. 131 132 """ 133

134 -class CFGEditor(object):

135 """ 136 A dialog window for creating and editing context free grammars. 137 C{CFGEditor} places the following restrictions on what C{CFG}s can 138 be edited: 139 - All nonterminals must be strings consisting of word 140 characters. 141 - All terminals must be strings consisting of word characters 142 and space characters. 143 """ 144 # Regular expressions used by _analyze_line. Precompile them, so 145 # we can process the text faster. 146 ARROW = SymbolWidget.SYMBOLS['rightarrow'] 147 _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))") 148 _ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*") 149 _PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" + # LHS 150 "(->|("+ARROW+"))\s*" + # arrow 151 r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$") # RHS 152 _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")") 153 _BOLD = ('helvetica', -12, 'bold') 154

155 - def __init__(self, parent, cfg=None, set_cfg_callback=None):

156 self._parent = parent 157 if cfg is not None: self._cfg = cfg 158 else: self._cfg = Grammar(Nonterminal('S'), []) 159 self._set_cfg_callback = set_cfg_callback 160 161 self._highlight_matching_nonterminals = 1 162 163 # Create the top-level window. 164 self._top = Toplevel(parent) 165 self._init_bindings() 166 167 self._init_startframe() 168 self._startframe.pack(side='top', fill='x', expand=0) 169 self._init_prodframe() 170 self._prodframe.pack(side='top', fill='both', expand=1) 171 self._init_buttons() 172 self._buttonframe.pack(side='bottom', fill='x', expand=0) 173 174 self._textwidget.focus()

175

176 - def _init_startframe(self):

177 frame = self._startframe = Frame(self._top) 178 self._start = Entry(frame) 179 self._start.pack(side='right') 180 Label(frame, text='Start Symbol:').pack(side='right') 181 Label(frame, text='Productions:').pack(side='left') 182 self._start.insert(0, self._cfg.start().symbol())

183

184 - def _init_buttons(self):

185 frame = self._buttonframe = Frame(self._top) 186 Button(frame, text='Ok', command=self._ok, 187 underline=0, takefocus=0).pack(side='left') 188 Button(frame, text='Apply', command=self._apply, 189 underline=0, takefocus=0).pack(side='left') 190 Button(frame, text='Reset', command=self._reset, 191 underline=0, takefocus=0,).pack(side='left') 192 Button(frame, text='Cancel', command=self._cancel, 193 underline=0, takefocus=0).pack(side='left') 194 Button(frame, text='Help', command=self._help, 195 underline=0, takefocus=0).pack(side='right')

196

197 - def _init_bindings(self):

198 self._top.title('CFG Editor') 199 self._top.bind('<Control-q>', self._cancel) 200 self._top.bind('<Alt-q>', self._cancel) 201 self._top.bind('<Control-d>', self._cancel) 202 #self._top.bind('<Control-x>', self._cancel) 203 self._top.bind('<Alt-x>', self._cancel) 204 self._top.bind('<Escape>', self._cancel) 205 #self._top.bind('<Control-c>', self._cancel) 206 self._top.bind('<Alt-c>', self._cancel) 207 208 self._top.bind('<Control-o>', self._ok) 209 self._top.bind('<Alt-o>', self._ok) 210 self._top.bind('<Control-a>', self._apply) 211 self._top.bind('<Alt-a>', self._apply) 212 self._top.bind('<Control-r>', self._reset) 213 self._top.bind('<Alt-r>', self._reset) 214 self._top.bind('<Control-h>', self._help) 215 self._top.bind('<Alt-h>', self._help) 216 self._top.bind('<F1>', self._help)

217

218 - def _init_prodframe(self):

219 self._prodframe = Frame(self._top) 220 221 # Create the basic Text widget & scrollbar. 222 self._textwidget = Text(self._prodframe, background='#e0e0e0', 223 exportselection=1) 224 self._textscroll = Scrollbar(self._prodframe, takefocus=0, 225 orient='vertical') 226 self._textwidget.config(yscrollcommand = self._textscroll.set) 227 self._textscroll.config(command=self._textwidget.yview) 228 self._textscroll.pack(side='right', fill='y') 229 self._textwidget.pack(expand=1, fill='both', side='left') 230 231 # Initialize the colorization tags. Each nonterminal gets its 232 # own tag, so they aren't listed here. 233 self._textwidget.tag_config('terminal', foreground='#006000') 234 self._textwidget.tag_config('arrow', font='symbol') 235 self._textwidget.tag_config('error', background='red') 236 237 # Keep track of what line they're on. We use that to remember 238 # to re-analyze a line whenever they leave it. 239 self._linenum = 0 240 241 # Expand "->" to an arrow. 242 self._top.bind('>', self._replace_arrows) 243 244 # Re-colorize lines when appropriate. 245 self._top.bind('<<Paste>>', self._analyze) 246 self._top.bind('<KeyPress>', self._check_analyze) 247 self._top.bind('<ButtonPress>', self._check_analyze) 248 249 # Tab cycles focus. (why doesn't this work??) 250 def cycle(e, textwidget=self._textwidget): 251 textwidget.tk_focusNext().focus()

252 self._textwidget.bind('<Tab>', cycle) 253 254 prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()] 255 for i in range(len(prod_tuples)-1,0,-1): 256 if (prod_tuples[i][0] == prod_tuples[i-1][0]): 257 if () in prod_tuples[i][1]: continue 258 if () in prod_tuples[i-1][1]: continue 259 print prod_tuples[i-1][1] 260 print prod_tuples[i][1] 261 prod_tuples[i-1][1].extend(prod_tuples[i][1]) 262 del prod_tuples[i] 263 264 for lhs, rhss in prod_tuples: 265 print lhs, rhss 266 s = '%s ->' % lhs 267 for rhs in rhss: 268 for elt in rhs: 269 if isinstance(elt, Nonterminal): s += ' %s' % elt 270 else: s += ' %r' % elt 271 s += ' |' 272 s = s[:-2] + '\n' 273 self._textwidget.insert('end', s) 274 275 self._analyze()

276 277 # # Add the producitons to the text widget, and colorize them. 278 # prod_by_lhs = {} 279 # for prod in self._cfg.productions(): 280 # if len(prod.rhs()) > 0: 281 # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod) 282 # for (lhs, prods) in prod_by_lhs.items(): 283 # self._textwidget.insert('end', '%s ->' % lhs) 284 # self._textwidget.insert('end', self._rhs(prods[0])) 285 # for prod in prods[1:]: 286 # print '\t|'+self._rhs(prod), 287 # self._textwidget.insert('end', '\t|'+self._rhs(prod)) 288 # print 289 # self._textwidget.insert('end', '\n') 290 # for prod in self._cfg.productions(): 291 # if len(prod.rhs()) == 0: 292 # self._textwidget.insert('end', '%s' % prod) 293 # self._analyze() 294 295 # def _rhs(self, prod): 296 # s = '' 297 # for elt in prod.rhs(): 298 # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol() 299 # else: s += ' %r' % elt 300 # return s 301

302 - def _clear_tags(self, linenum):

303 """ 304 Remove all tags (except C{arrow} and C{sel}) from the given 305 line of the text widget used for editing the productions. 306 """ 307 start = '%d.0'%linenum 308 end = '%d.end'%linenum 309 for tag in self._textwidget.tag_names(): 310 if tag not in ('arrow', 'sel'): 311 self._textwidget.tag_remove(tag, start, end)

312

313 - def _check_analyze(self, *e):

314 """ 315 Check if we've moved to a new line. If we have, then remove 316 all colorization from the line we moved to, and re-colorize 317 the line that we moved from. 318 """ 319 linenum = int(self._textwidget.index('insert').split('.')[0]) 320 if linenum != self._linenum: 321 self._clear_tags(linenum) 322 self._analyze_line(self._linenum) 323 self._linenum = linenum

324

325 - def _replace_arrows(self, *e):

326 """ 327 Replace any C{'->'} text strings with arrows (char \\256, in 328 symbol font). This searches the whole buffer, but is fast 329 enough to be done anytime they press '>'. 330 """ 331 arrow = '1.0' 332 while 1: 333 arrow = self._textwidget.search('->', arrow, 'end+1char') 334 if arrow == '': break 335 self._textwidget.delete(arrow, arrow+'+2char') 336 self._textwidget.insert(arrow, self.ARROW, 'arrow') 337 self._textwidget.insert(arrow, '\t') 338 339 arrow = '1.0' 340 while 1: 341 arrow = self._textwidget.search(self.ARROW, arrow+'+1char', 342 'end+1char') 343 if arrow == '': break 344 self._textwidget.tag_add('arrow', arrow, arrow+'+1char')

345

346 - def _analyze_token(self, match, linenum):

347 """ 348 Given a line number and a regexp match for a token on that 349 line, colorize the token. Note that the regexp match gives us 350 the token's text, start index (on the line), and end index (on 351 the line). 352 """ 353 # What type of token is it? 354 if match.group()[0] in "'\"": tag = 'terminal' 355 elif match.group() in ('->', self.ARROW): tag = 'arrow' 356 else: 357 # If it's a nonterminal, then set up new bindings, so we 358 # can highlight all instances of that nonterminal when we 359 # put the mouse over it. 360 tag = 'nonterminal_'+match.group() 361 if tag not in self._textwidget.tag_names(): 362 self._init_nonterminal_tag(tag) 363 364 start = '%d.%d' % (linenum, match.start()) 365 end = '%d.%d' % (linenum, match.end()) 366 self._textwidget.tag_add(tag, start, end)

367

368 - def _init_nonterminal_tag(self, tag, foreground='blue'):

369 self._textwidget.tag_config(tag, foreground=foreground, 370 font=CFGEditor._BOLD) 371 if not self._highlight_matching_nonterminals: 372 return 373 def enter(e, textwidget=self._textwidget, tag=tag): 374 textwidget.tag_config(tag, background='#80ff80')

375 def leave(e, textwidget=self._textwidget, tag=tag): 376 textwidget.tag_config(tag, background='') 377 self._textwidget.tag_bind(tag, '<Enter>', enter) 378 self._textwidget.tag_bind(tag, '<Leave>', leave) 379

380 - def _analyze_line(self, linenum):

381 """ 382 Colorize a given line. 383 """ 384 # Get rid of any tags that were previously on the line. 385 self._clear_tags(linenum) 386 387 # Get the line line's text string. 388 line = self._textwidget.get(`linenum`+'.0', `linenum`+'.end') 389 390 # If it's a valid production, then colorize each token. 391 if CFGEditor._PRODUCTION_RE.match(line): 392 # It's valid; Use _TOKEN_RE to tokenize the production, 393 # and call analyze_token on each token. 394 def analyze_token(match, self=self, linenum=linenum): 395 self._analyze_token(match, linenum) 396 return ''

397 CFGEditor._TOKEN_RE.sub(analyze_token, line) 398 elif line.strip() != '': 399 # It's invalid; show the user where the error is. 400 self._mark_error(linenum, line) 401

402 - def _mark_error(self, linenum, line):

403 """ 404 Mark the location of an error in a line. 405 """ 406 arrowmatch = CFGEditor._ARROW_RE.search(line) 407 if not arrowmatch: 408 # If there's no arrow at all, highlight the whole line. 409 start = '%d.0' % linenum 410 end = '%d.end' % linenum 411 elif not CFGEditor._LHS_RE.match(line): 412 # Otherwise, if the LHS is bad, highlight it. 413 start = '%d.0' % linenum 414 end = '%d.%d' % (linenum, arrowmatch.start()) 415 else: 416 # Otherwise, highlight the RHS. 417 start = '%d.%d' % (linenum, arrowmatch.end()) 418 end = '%d.end' % linenum 419 420 # If we're highlighting 0 chars, highlight the whole line. 421 if self._textwidget.compare(start, '==', end): 422 start = '%d.0' % linenum 423 end = '%d.end' % linenum 424 self._textwidget.tag_add('error', start, end)

425

426 - def _analyze(self, *e):

427 """ 428 Replace C{->} with arrows, and colorize the entire buffer. 429 """ 430 self._replace_arrows() 431 numlines = int(self._textwidget.index('end').split('.')[0]) 432 for linenum in range(1, numlines+1): # line numbers start at 1. 433 self._analyze_line(linenum)

434

435 - def _parse_productions(self):

436 """ 437 Parse the current contents of the textwidget buffer, to create 438 a list of productions. 439 """ 440 productions = [] 441 442 # Get the text, normalize it, and split it into lines. 443 text = self._textwidget.get('1.0', 'end') 444 text = re.sub(self.ARROW, '->', text) 445 text = re.sub('\t', ' ', text) 446 lines = text.split('\n') 447 448 # Convert each line to a CFG production 449 for line in lines: 450 line = line.strip() 451 if line=='': continue 452 productions += parse_cfg_production(line) 453 #if line.strip() == '': continue 454 #if not CFGEditor._PRODUCTION_RE.match(line): 455 # raise ValueError('Bad production string %r' % line) 456 # 457 #(lhs_str, rhs_str) = line.split('->') 458 #lhs = Nonterminal(lhs_str.strip()) 459 #rhs = [] 460 #def parse_token(match, rhs=rhs): 461 # token = match.group() 462 # if token[0] in "'\"": rhs.append(token[1:-1]) 463 # else: rhs.append(Nonterminal(token)) 464 # return '' 465 #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str) 466 # 467 #productions.append(Production(lhs, *rhs)) 468 469 return productions

470

471 - def _destroy(self, *e):

472 if self._top is None: return 473 self._top.destroy() 474 self._top = None

475

476 - def _ok(self, *e):

477 self._apply() 478 self._destroy()

479

480 - def _apply(self, *e):

481 productions = self._parse_productions() 482 start = Nonterminal(self._start.get()) 483 cfg = Grammar(start, productions) 484 if self._set_cfg_callback is not None: 485 self._set_cfg_callback(cfg)

486

487 - def _reset(self, *e):

488 self._textwidget.delete('1.0', 'end') 489 for production in self._cfg.productions(): 490 self._textwidget.insert('end', '%s\n' % production) 491 self._analyze() 492 if self._set_cfg_callback is not None: 493 self._set_cfg_callback(self._cfg)

494

495 - def _cancel(self, *e):

496 try: self._reset() 497 except: pass 498 self._destroy()

499

500 - def _help(self, *e):

501 # The default font's not very legible; try using 'fixed' instead. 502 try: 503 ShowText(self._parent, 'Help: Chart Parser Demo', 504 (_CFGEditor_HELP).strip(), width=75, font='fixed') 505 except: 506 ShowText(self._parent, 'Help: Chart Parser Demo', 507 (_CFGEditor_HELP).strip(), width=75)

508 509 ###################################################################### 510 # New Demo (built tree based on cfg) 511 ###################################################################### 512

513 -class CFGDemo(object):

514 - def __init__(self, grammar, text):

515 self._grammar = grammar 516 self._text = text 517 518 # Set up the main window. 519 self._top = Tk() 520 self._top.title('Context Free Grammar Demo') 521 522 # Base font size 523 self._size = IntVar(self._top) 524 self._size.set(12) # = medium 525 526 # Set up the key bindings 527 self._init_bindings(self._top) 528 529 # Create the basic frames 530 frame1 = Frame(self._top) 531 frame1.pack(side='left', fill='y', expand=0) 532 self._init_menubar(self._top) 533 self._init_buttons(self._top) 534 self._init_grammar(frame1) 535 self._init_treelet(frame1) 536 self._init_workspace(self._top)

537 538 #////////////////////////////////////////////////// 539 # Initialization 540 #////////////////////////////////////////////////// 541

542 - def _init_bindings(self, top):

543 top.bind('<Control-q>', self.destroy)

544

545 - def _init_menubar(self, parent): pass

546

547 - def _init_buttons(self, parent): pass

548

549 - def _init_grammar(self, parent):

550 self._prodlist = ProductionList(parent, self._grammar, width=20) 551 self._prodlist.pack(side='top', fill='both', expand=1) 552 self._prodlist.focus() 553 self._prodlist.add_callback('select', self._selectprod_cb) 554 self._prodlist.add_callback('move', self._selectprod_cb)

555

556 - def _init_treelet(self, parent):

557 self._treelet_canvas = Canvas(parent, background='white') 558 self._treelet_canvas.pack(side='bottom', fill='x') 559 self._treelet = None

560

561 - def _init_workspace(self, parent):

562 self._workspace = CanvasFrame(parent, background='white') 563 self._workspace.pack(side='right', fill='both', expand=1) 564 self._tree = None 565 self.reset_workspace()

566 567 #////////////////////////////////////////////////// 568 # Workspace 569 #////////////////////////////////////////////////// 570

571 - def reset_workspace(self):

572 c = self._workspace.canvas() 573 fontsize = int(self._size.get()) 574 node_font = ('helvetica', -(fontsize+4), 'bold') 575 leaf_font = ('helvetica', -(fontsize+2)) 576 577 # Remove the old tree 578 if self._tree is not None: 579 self._workspace.remove_widget(self._tree) 580 581 # The root of the tree. 582 start = self._grammar.start().symbol() 583 rootnode = TextWidget(c, start, font=node_font, draggable=1) 584 585 # The leaves of the tree. 586 leaves = [] 587 for word in self._text: 588 if isinstance(word, Token): word = word.type() 589 leaves.append(TextWidget(c, word, font=leaf_font, draggable=1)) 590 591 # Put it all together into one tree 592 self._tree = TreeSegmentWidget(c, rootnode, leaves, 593 color='white') 594 595 # Add it to the workspace. 596 self._workspace.add_widget(self._tree) 597 598 # Move the leaves to the bottom of the workspace. 599 for leaf in leaves: leaf.move(0,100)

600 601 #self._nodes = {start:1} 602 #self._leaves = dict([(l,1) for l in leaves]) 603

604 - def workspace_markprod(self, production):

605 pass

606

607 - def _markproduction(self, prod, tree=None):

608 if tree is None: tree = self._tree 609 for i in range(len(tree.subtrees())-len(prod.rhs())): 610 if tree['color', i] == 'white': 611 self._markproduction 612 613 for j, node in enumerate(prod.rhs()): 614 widget = tree.subtrees()[i+j] 615 if (isinstance(node, Nonterminal) and 616 isinstance(widget, TreeSegmentWidget) and 617 node.symbol == widget.node().text()): 618 pass # matching nonterminal 619 elif (isinstance(node, (str, unicode)) and 620 isinstance(widget, TextWidget) and 621 node == widget.text()): 622 pass # matching nonterminal 623 else: break 624 else: 625 # Everything matched! 626 print 'MATCH AT', i

627 628 #////////////////////////////////////////////////// 629 # Grammar 630 #////////////////////////////////////////////////// 631

632 - def _selectprod_cb(self, production):

633 canvas = self._treelet_canvas 634 635 self._prodlist.highlight(production) 636 if self._treelet is not None: self._treelet.destroy() 637 638 # Convert the production to a tree. 639 from nltk import Tree 640 rhs = production.rhs() 641 for (i, elt) in enumerate(rhs): 642 if isinstance(elt, Nonterminal): elt = Tree(elt) 643 tree = Tree(production.lhs().symbol(), *rhs) 644 645 # Draw the tree in the treelet area. 646 fontsize = int(self._size.get()) 647 node_font = ('helvetica', -(fontsize+4), 'bold') 648 leaf_font = ('helvetica', -(fontsize+2)) 649 self._treelet = tree_to_treesegment(canvas, tree, 650 node_font=node_font, 651 leaf_font=leaf_font) 652 self._treelet['draggable'] = 1 653 654 # Center the treelet. 655 (x1, y1, x2, y2) = self._treelet.bbox() 656 w, h = int(canvas['width']), int(canvas['height']) 657 self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2) 658 659 # Mark the places where we can add it to the workspace. 660 self._markproduction(production)

661

662 - def destroy(self, *args):

663 self._top.destroy()

664

665 - def mainloop(self, *args, **kwargs):

666 self._top.mainloop(*args, **kwargs)

667

668 -def demo2():

669 from nltk import cfg 670 nonterminals = 'S VP NP PP P N Name V Det' 671 (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s) 672 for s in nonterminals.split()] 673 productions = ( 674 # Syntactic Productions 675 cfg.Production(S, [NP, VP]), 676 cfg.Production(NP, [Det, N]), 677 cfg.Production(NP, [NP, PP]), 678 cfg.Production(VP, [VP, PP]), 679 cfg.Production(VP, [V, NP, PP]), 680 cfg.Production(VP, [V, NP]), 681 cfg.Production(PP, [P, NP]), 682 cfg.Production(PP, []), 683 684 cfg.Production(PP, ['up', 'over', NP]), 685 686 # Lexical Productions 687 cfg.Production(NP, ['I']), cfg.Production(Det, ['the']), 688 cfg.Production(Det, ['a']), cfg.Production(N, ['man']), 689 cfg.Production(V, ['saw']), cfg.Production(P, ['in']), 690 cfg.Production(P, ['with']), cfg.Production(N, ['park']), 691 cfg.Production(N, ['dog']), cfg.Production(N, ['statue']), 692 cfg.Production(Det, ['my']), 693 ) 694 grammar = cfg.Grammar(S, productions) 695 696 text = 'I saw a man in the park'.split() 697 d=CFGDemo(grammar, text) 698 d.mainloop()

699 700 ###################################################################### 701 # Old Demo 702 ###################################################################### 703

704 -def demo():

705 from nltk import cfg 706 nonterminals = 'S VP NP PP P N Name V Det' 707 (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s) 708 for s in nonterminals.split()] 709 710 grammar = cfg.parse_cfg(""" 711 S -> NP VP 712 PP -> P NP 713 NP -> Det N 714 NP -> NP PP 715 VP -> V NP 716 VP -> VP PP 717 Det -> 'a' 718 Det -> 'the' 719 Det -> 'my' 720 NP -> 'I' 721 N -> 'dog' 722 N -> 'man' 723 N -> 'park' 724 N -> 'statue' 725 V -> 'saw' 726 P -> 'in' 727 P -> 'up' 728 P -> 'over' 729 P -> 'with' 730 """) 731 732 def cb(grammar): print grammar 733 top = Tk() 734 editor = CFGEditor(top, grammar, cb) 735 Label(top, text='\nTesting CFG Editor\n').pack() 736 Button(top, text='Quit', command=top.destroy).pack() 737 top.mainloop()

738

739 -def demo3():

740 from nltk import cfg 741 (S, VP, NP, PP, P, N, Name, V, Det) = \ 742 nonterminals('S, VP, NP, PP, P, N, Name, V, Det') 743 744 productions = ( 745 # Syntactic Productions 746 cfg.Production(S, [NP, VP]), 747 cfg.Production(NP, [Det, N]), 748 cfg.Production(NP, [NP, PP]), 749 cfg.Production(VP, [VP, PP]), 750 cfg.Production(VP, [V, NP, PP]), 751 cfg.Production(VP, [V, NP]), 752 cfg.Production(PP, [P, NP]), 753 cfg.Production(PP, []), 754 755 cfg.Production(PP, ['up', 'over', NP]), 756 757 # Lexical Productions 758 cfg.Production(NP, ['I']), cfg.Production(Det, ['the']), 759 cfg.Production(Det, ['a']), cfg.Production(N, ['man']), 760 cfg.Production(V, ['saw']), cfg.Production(P, ['in']), 761 cfg.Production(P, ['with']), cfg.Production(N, ['park']), 762 cfg.Production(N, ['dog']), cfg.Production(N, ['statue']), 763 cfg.Production(Det, ['my']), 764 ) 765 766 t = Tk() 767 def destroy(e, t=t): t.destroy() 768 t.bind('q', destroy) 769 p = ProductionList(t, productions) 770 p.pack(expand=1, fill='both') 771 p.add_callback('select', p.markonly) 772 p.add_callback('move', p.markonly) 773 p.focus() 774 p.mark(productions[2]) 775 p.mark(productions[8])

776 777 if __name__ == '__main__': demo() 778

Source Code for Module nltk.draw.cfg