Code Coverage for nltk.draw.cfg
Untested Functions
- demo(), demo2(), demo3()
- CFGDemo: __init__(), _init_bindings(), _init_grammar(), _init_treelet(), _init_workspace(), _markproduction(), _selectprod_cb(), destroy(), mainloop(), reset_workspace(), workspace_markprod()
- CFGEditor: __init__(), _analyze(), _analyze_line(), _analyze_token(), _apply(), _cancel(), _check_analyze(), _clear_tags(), _destroy(), _help(), _init_bindings(), _init_buttons(), _init_nonterminal_tag(), _init_prodframe(), _init_startframe(), _mark_error(), _ok(), _parse_productions(), _replace_arrows(), _reset()
- ProductionList: _init_colortags(), _item_repr()
"""
Visualization tools for CFGs.
"""
import re
"""
Idea for a nice demo:
- 3 panes: grammar, treelet, working area
- grammar is a list of productions
- when you select a production, the treelet that it licenses appears
in the treelet area
- the working area has the text on the bottom, and S at top. When
you select a production, it shows (ghosted) the locations where
that production's treelet could be attached to either the text
or the tree rooted at S.
- the user can drag the treelet onto one of those (or click on them?)
- the user can delete pieces of the tree from the working area
(right click?)
- connecting top to bottom? drag one NP onto another?
+-------------------------------------------------------------+
| S -> NP VP | S |
|[NP -> Det N ]| / \ |
| ... | NP VP |
| N -> 'dog' | |
| N -> 'cat' | |
| ... | |
+--------------+ |
| NP | Det N |
| / \ | | | |
| Det N | the cat saw the dog |
| | |
+--------------+----------------------------------------------+
Operations:
- connect a new treelet -- drag or click shadow
- delete a treelet -- right click
- if only connected to top, delete everything below
- if only connected to bottom, delete everything above
- connect top & bottom -- drag a leaf to a root or a root to a leaf
- disconnect top & bottom -- right click
- if connected to top & bottom, then disconnect
"""
from nltk.draw import *
from nltk.cfg import *
from Tkinter import *
from nltk.tree import *
from nltk.draw.tree import *
class ProductionList(ColorizedList):
ARROW = SymbolWidget.SYMBOLS['rightarrow']
def _init_colortags(self, textwidget, options):
textwidget.tag_config('terminal', foreground='#006000')
textwidget.tag_config('arrow', font='symbol', underline='0')
textwidget.tag_config('nonterminal', foreground='blue',
font=('helvetica', -12, 'bold'))
def _item_repr(self, item):
contents = []
contents.append(('%s\t' % item.lhs(), 'nonterminal'))
contents.append((self.ARROW, 'arrow'))
for elt in item.rhs():
if isinstance(elt, Nonterminal):
contents.append((' %s' % elt.symbol(), 'nonterminal'))
else:
contents.append((' %r' % elt, 'terminal'))
return contents
_CFGEditor_HELP = """
The CFG Editor can be used to create or modify context free grammars.
A context free grammar consists of a start symbol and a list of
productions. The start symbol is specified by the text entry field in
the upper right hand corner of the editor; and the list of productions
are specified in the main text editing box.
Every non-blank line specifies a single production. Each production
has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
is a list of nonterminals and terminals.
Nonterminals must be a single word, such as S or NP or NP_subj.
Currently, nonterminals must consists of alphanumeric characters and
underscores (_). Nonterminals are colored blue. If you place the
mouse over any nonterminal, then all occurances of that nonterminal
will be highlighted.
Termianals must be surrounded by single quotes (') or double
quotes(\"). For example, "dog" and "New York" are terminals.
Currently, the string within the quotes must consist of alphanumeric
characters, underscores, and spaces.
To enter a new production, go to a blank line, and type a nonterminal,
followed by an arrow (->), followed by a sequence of terminals and
nonterminals. Note that "->" (dash + greater-than) is automatically
converted to an arrow symbol. When you move your cursor to a
different line, your production will automatically be colorized. If
there are any errors, they will be highlighted in red.
Note that the order of the productions is signifigant for some
algorithms. To re-order the productions, use cut and paste to move
them.
Use the buttons at the bottom of the window when you are done editing
the CFG:
- Ok: apply the new CFG, and exit the editor.
- Apply: apply the new CFG, and do not exit the editor.
- Reset: revert to the original CFG, and do not exit the editor.
- Cancel: revert to the original CFG, and exit the editor.
"""
class CFGEditor(object):
"""
A dialog window for creating and editing context free grammars.
C{CFGEditor} places the following restrictions on what C{CFG}s can
be edited:
- All nonterminals must be strings consisting of word
characters.
- All terminals must be strings consisting of word characters
and space characters.
"""
ARROW = SymbolWidget.SYMBOLS['rightarrow']
_LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))")
_ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*")
_PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" +
"(->|("+ARROW+"))\s*" +
r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$")
_TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")")
_BOLD = ('helvetica', -12, 'bold')
def __init__(self, parent, cfg=None, set_cfg_callback=None):
self._parent = parent
if cfg is not None: self._cfg = cfg
else: self._cfg = Grammar(Nonterminal('S'), [])
self._set_cfg_callback = set_cfg_callback
self._highlight_matching_nonterminals = 1
self._top = Toplevel(parent)
self._init_bindings()
self._init_startframe()
self._startframe.pack(side='top', fill='x', expand=0)
self._init_prodframe()
self._prodframe.pack(side='top', fill='both', expand=1)
self._init_buttons()
self._buttonframe.pack(side='bottom', fill='x', expand=0)
self._textwidget.focus()
def _init_startframe(self):
frame = self._startframe = Frame(self._top)
self._start = Entry(frame)
self._start.pack(side='right')
Label(frame, text='Start Symbol:').pack(side='right')
Label(frame, text='Productions:').pack(side='left')
self._start.insert(0, self._cfg.start().symbol())
def _init_buttons(self):
frame = self._buttonframe = Frame(self._top)
Button(frame, text='Ok', command=self._ok,
underline=0, takefocus=0).pack(side='left')
Button(frame, text='Apply', command=self._apply,
underline=0, takefocus=0).pack(side='left')
Button(frame, text='Reset', command=self._reset,
underline=0, takefocus=0,).pack(side='left')
Button(frame, text='Cancel', command=self._cancel,
underline=0, takefocus=0).pack(side='left')
Button(frame, text='Help', command=self._help,
underline=0, takefocus=0).pack(side='right')
def _init_bindings(self):
self._top.title('CFG Editor')
self._top.bind('<Control-q>', self._cancel)
self._top.bind('<Alt-q>', self._cancel)
self._top.bind('<Control-d>', self._cancel)
self._top.bind('<Alt-x>', self._cancel)
self._top.bind('<Escape>', self._cancel)
self._top.bind('<Alt-c>', self._cancel)
self._top.bind('<Control-o>', self._ok)
self._top.bind('<Alt-o>', self._ok)
self._top.bind('<Control-a>', self._apply)
self._top.bind('<Alt-a>', self._apply)
self._top.bind('<Control-r>', self._reset)
self._top.bind('<Alt-r>', self._reset)
self._top.bind('<Control-h>', self._help)
self._top.bind('<Alt-h>', self._help)
self._top.bind('<F1>', self._help)
def _init_prodframe(self):
self._prodframe = Frame(self._top)
self._textwidget = Text(self._prodframe, background='#e0e0e0',
exportselection=1)
self._textscroll = Scrollbar(self._prodframe, takefocus=0,
orient='vertical')
self._textwidget.config(yscrollcommand = self._textscroll.set)
self._textscroll.config(command=self._textwidget.yview)
self._textscroll.pack(side='right', fill='y')
self._textwidget.pack(expand=1, fill='both', side='left')
self._textwidget.tag_config('terminal', foreground='#006000')
self._textwidget.tag_config('arrow', font='symbol')
self._textwidget.tag_config('error', background='red')
self._linenum = 0
self._top.bind('>', self._replace_arrows)
self._top.bind('<<Paste>>', self._analyze)
self._top.bind('<KeyPress>', self._check_analyze)
self._top.bind('<ButtonPress>', self._check_analyze)
def cycle(e, textwidget=self._textwidget):
textwidget.tk_focusNext().focus()
self._textwidget.bind('<Tab>', cycle)
prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()]
for i in range(len(prod_tuples)-1,0,-1):
if (prod_tuples[i][0] == prod_tuples[i-1][0]):
if () in prod_tuples[i][1]: continue
if () in prod_tuples[i-1][1]: continue
print prod_tuples[i-1][1]
print prod_tuples[i][1]
prod_tuples[i-1][1].extend(prod_tuples[i][1])
del prod_tuples[i]
for lhs, rhss in prod_tuples:
print lhs, rhss
s = '%s ->' % lhs
for rhs in rhss:
for elt in rhs:
if isinstance(elt, Nonterminal): s += ' %s' % elt
else: s += ' %r' % elt
s += ' |'
s = s[:-2] + '\n'
self._textwidget.insert('end', s)
self._analyze()
def _clear_tags(self, linenum):
"""
Remove all tags (except C{arrow} and C{sel}) from the given
line of the text widget used for editing the productions.
"""
start = '%d.0'%linenum
end = '%d.end'%linenum
for tag in self._textwidget.tag_names():
if tag not in ('arrow', 'sel'):
self._textwidget.tag_remove(tag, start, end)
def _check_analyze(self, *e):
"""
Check if we've moved to a new line. If we have, then remove
all colorization from the line we moved to, and re-colorize
the line that we moved from.
"""
linenum = int(self._textwidget.index('insert').split('.')[0])
if linenum != self._linenum:
self._clear_tags(linenum)
self._analyze_line(self._linenum)
self._linenum = linenum
def _replace_arrows(self, *e):
"""
Replace any C{'->'} text strings with arrows (char \\256, in
symbol font). This searches the whole buffer, but is fast
enough to be done anytime they press '>'.
"""
arrow = '1.0'
while 1:
arrow = self._textwidget.search('->', arrow, 'end+1char')
if arrow == '': break
self._textwidget.delete(arrow, arrow+'+2char')
self._textwidget.insert(arrow, self.ARROW, 'arrow')
self._textwidget.insert(arrow, '\t')
arrow = '1.0'
while 1:
arrow = self._textwidget.search(self.ARROW, arrow+'+1char',
'end+1char')
if arrow == '': break
self._textwidget.tag_add('arrow', arrow, arrow+'+1char')
def _analyze_token(self, match, linenum):
"""
Given a line number and a regexp match for a token on that
line, colorize the token. Note that the regexp match gives us
the token's text, start index (on the line), and end index (on
the line).
"""
if match.group()[0] in "'\"": tag = 'terminal'
elif match.group() in ('->', self.ARROW): tag = 'arrow'
else:
tag = 'nonterminal_'+match.group()
if tag not in self._textwidget.tag_names():
self._init_nonterminal_tag(tag)
start = '%d.%d' % (linenum, match.start())
end = '%d.%d' % (linenum, match.end())
self._textwidget.tag_add(tag, start, end)
def _init_nonterminal_tag(self, tag, foreground='blue'):
self._textwidget.tag_config(tag, foreground=foreground,
font=CFGEditor._BOLD)
if not self._highlight_matching_nonterminals:
return
def enter(e, textwidget=self._textwidget, tag=tag):
textwidget.tag_config(tag, background='#80ff80')
def leave(e, textwidget=self._textwidget, tag=tag):
textwidget.tag_config(tag, background='')
self._textwidget.tag_bind(tag, '<Enter>', enter)
self._textwidget.tag_bind(tag, '<Leave>', leave)
def _analyze_line(self, linenum):
"""
Colorize a given line.
"""
self._clear_tags(linenum)
line = self._textwidget.get(`linenum`+'.0', `linenum`+'.end')
if CFGEditor._PRODUCTION_RE.match(line):
def analyze_token(match, self=self, linenum=linenum):
self._analyze_token(match, linenum)
return ''
CFGEditor._TOKEN_RE.sub(analyze_token, line)
elif line.strip() != '':
self._mark_error(linenum, line)
def _mark_error(self, linenum, line):
"""
Mark the location of an error in a line.
"""
arrowmatch = CFGEditor._ARROW_RE.search(line)
if not arrowmatch:
start = '%d.0' % linenum
end = '%d.end' % linenum
elif not CFGEditor._LHS_RE.match(line):
start = '%d.0' % linenum
end = '%d.%d' % (linenum, arrowmatch.start())
else:
start = '%d.%d' % (linenum, arrowmatch.end())
end = '%d.end' % linenum
if self._textwidget.compare(start, '==', end):
start = '%d.0' % linenum
end = '%d.end' % linenum
self._textwidget.tag_add('error', start, end)
def _analyze(self, *e):
"""
Replace C{->} with arrows, and colorize the entire buffer.
"""
self._replace_arrows()
numlines = int(self._textwidget.index('end').split('.')[0])
for linenum in range(1, numlines+1):
self._analyze_line(linenum)
def _parse_productions(self):
"""
Parse the current contents of the textwidget buffer, to create
a list of productions.
"""
productions = []
text = self._textwidget.get('1.0', 'end')
text = re.sub(self.ARROW, '->', text)
text = re.sub('\t', ' ', text)
lines = text.split('\n')
for line in lines:
line = line.strip()
if line=='': continue
productions += parse_cfg_production(line)
return productions
def _destroy(self, *e):
if self._top is None: return
self._top.destroy()
self._top = None
def _ok(self, *e):
self._apply()
self._destroy()
def _apply(self, *e):
productions = self._parse_productions()
start = Nonterminal(self._start.get())
cfg = Grammar(start, productions)
if self._set_cfg_callback is not None:
self._set_cfg_callback(cfg)
def _reset(self, *e):
self._textwidget.delete('1.0', 'end')
for production in self._cfg.productions():
self._textwidget.insert('end', '%s\n' % production)
self._analyze()
if self._set_cfg_callback is not None:
self._set_cfg_callback(self._cfg)
def _cancel(self, *e):
try: self._reset()
except: pass
self._destroy()
def _help(self, *e):
try:
ShowText(self._parent, 'Help: Chart Parser Demo',
(_CFGEditor_HELP).strip(), width=75, font='fixed')
except:
ShowText(self._parent, 'Help: Chart Parser Demo',
(_CFGEditor_HELP).strip(), width=75)
class CFGDemo(object):
def __init__(self, grammar, text):
self._grammar = grammar
self._text = text
self._top = Tk()
self._top.title('Context Free Grammar Demo')
self._size = IntVar(self._top)
self._size.set(12)
self._init_bindings(self._top)
frame1 = Frame(self._top)
frame1.pack(side='left', fill='y', expand=0)
self._init_menubar(self._top)
self._init_buttons(self._top)
self._init_grammar(frame1)
self._init_treelet(frame1)
self._init_workspace(self._top)
def _init_bindings(self, top):
top.bind('<Control-q>', self.destroy)
def _init_menubar(self, parent): pass
def _init_buttons(self, parent): pass
def _init_grammar(self, parent):
self._prodlist = ProductionList(parent, self._grammar, width=20)
self._prodlist.pack(side='top', fill='both', expand=1)
self._prodlist.focus()
self._prodlist.add_callback('select', self._selectprod_cb)
self._prodlist.add_callback('move', self._selectprod_cb)
def _init_treelet(self, parent):
self._treelet_canvas = Canvas(parent, background='white')
self._treelet_canvas.pack(side='bottom', fill='x')
self._treelet = None
def _init_workspace(self, parent):
self._workspace = CanvasFrame(parent, background='white')
self._workspace.pack(side='right', fill='both', expand=1)
self._tree = None
self.reset_workspace()
def reset_workspace(self):
c = self._workspace.canvas()
fontsize = int(self._size.get())
node_font = ('helvetica', -(fontsize+4), 'bold')
leaf_font = ('helvetica', -(fontsize+2))
if self._tree is not None:
self._workspace.remove_widget(self._tree)
start = self._grammar.start().symbol()
rootnode = TextWidget(c, start, font=node_font, draggable=1)
leaves = []
for word in self._text:
if isinstance(word, Token): word = word.type()
leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
self._tree = TreeSegmentWidget(c, rootnode, leaves,
color='white')
self._workspace.add_widget(self._tree)
for leaf in leaves: leaf.move(0,100)
def workspace_markprod(self, production):
pass
def _markproduction(self, prod, tree=None):
if tree is None: tree = self._tree
for i in range(len(tree.subtrees())-len(prod.rhs())):
if tree['color', i] == 'white':
self._markproduction
for j, node in enumerate(prod.rhs()):
widget = tree.subtrees()[i+j]
if (isinstance(node, Nonterminal) and
isinstance(widget, TreeSegmentWidget) and
node.symbol == widget.node().text()):
pass
elif (isinstance(node, (str, unicode)) and
isinstance(widget, TextWidget) and
node == widget.text()):
pass
else: break
else:
print 'MATCH AT', i
def _selectprod_cb(self, production):
canvas = self._treelet_canvas
self._prodlist.highlight(production)
if self._treelet is not None: self._treelet.destroy()
from nltk import Tree
rhs = production.rhs()
for (i, elt) in enumerate(rhs):
if isinstance(elt, Nonterminal): elt = Tree(elt)
tree = Tree(production.lhs().symbol(), *rhs)
fontsize = int(self._size.get())
node_font = ('helvetica', -(fontsize+4), 'bold')
leaf_font = ('helvetica', -(fontsize+2))
self._treelet = tree_to_treesegment(canvas, tree,
node_font=node_font,
leaf_font=leaf_font)
self._treelet['draggable'] = 1
(x1, y1, x2, y2) = self._treelet.bbox()
w, h = int(canvas['width']), int(canvas['height'])
self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
self._markproduction(production)
def destroy(self, *args):
self._top.destroy()
def mainloop(self, *args, **kwargs):
self._top.mainloop(*args, **kwargs)
def demo2():
from nltk import cfg
nonterminals = 'S VP NP PP P N Name V Det'
(S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s)
for s in nonterminals.split()]
productions = (
cfg.Production(S, [NP, VP]),
cfg.Production(NP, [Det, N]),
cfg.Production(NP, [NP, PP]),
cfg.Production(VP, [VP, PP]),
cfg.Production(VP, [V, NP, PP]),
cfg.Production(VP, [V, NP]),
cfg.Production(PP, [P, NP]),
cfg.Production(PP, []),
cfg.Production(PP, ['up', 'over', NP]),
cfg.Production(NP, ['I']), cfg.Production(Det, ['the']),
cfg.Production(Det, ['a']), cfg.Production(N, ['man']),
cfg.Production(V, ['saw']), cfg.Production(P, ['in']),
cfg.Production(P, ['with']), cfg.Production(N, ['park']),
cfg.Production(N, ['dog']), cfg.Production(N, ['statue']),
cfg.Production(Det, ['my']),
)
grammar = cfg.Grammar(S, productions)
text = 'I saw a man in the park'.split()
d=CFGDemo(grammar, text)
d.mainloop()
def demo():
from nltk import cfg
nonterminals = 'S VP NP PP P N Name V Det'
(S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s)
for s in nonterminals.split()]
grammar = cfg.parse_cfg("""
S -> NP VP
PP -> P NP
NP -> Det N
NP -> NP PP
VP -> V NP
VP -> VP PP
Det -> 'a'
Det -> 'the'
Det -> 'my'
NP -> 'I'
N -> 'dog'
N -> 'man'
N -> 'park'
N -> 'statue'
V -> 'saw'
P -> 'in'
P -> 'up'
P -> 'over'
P -> 'with'
""")
def cb(grammar): print grammar
top = Tk()
editor = CFGEditor(top, grammar, cb)
Label(top, text='\nTesting CFG Editor\n').pack()
Button(top, text='Quit', command=top.destroy).pack()
top.mainloop()
def demo3():
from nltk import cfg
(S, VP, NP, PP, P, N, Name, V, Det) = \
nonterminals('S, VP, NP, PP, P, N, Name, V, Det')
productions = (
cfg.Production(S, [NP, VP]),
cfg.Production(NP, [Det, N]),
cfg.Production(NP, [NP, PP]),
cfg.Production(VP, [VP, PP]),
cfg.Production(VP, [V, NP, PP]),
cfg.Production(VP, [V, NP]),
cfg.Production(PP, [P, NP]),
cfg.Production(PP, []),
cfg.Production(PP, ['up', 'over', NP]),
cfg.Production(NP, ['I']), cfg.Production(Det, ['the']),
cfg.Production(Det, ['a']), cfg.Production(N, ['man']),
cfg.Production(V, ['saw']), cfg.Production(P, ['in']),
cfg.Production(P, ['with']), cfg.Production(N, ['park']),
cfg.Production(N, ['dog']), cfg.Production(N, ['statue']),
cfg.Production(Det, ['my']),
)
t = Tk()
def destroy(e, t=t): t.destroy()
t.bind('q', destroy)
p = ProductionList(t, productions)
p.pack(expand=1, fill='both')
p.add_callback('select', p.markonly)
p.add_callback('move', p.markonly)
p.focus()
p.mark(productions[2])
p.mark(productions[8])
if __name__ == '__main__': demo()