>>> from nltk.grammar import DependencyGrammar >>> from nltk.parse import ( ... DependencyGraph, ... ProjectiveDependencyParser, ... NonprojectiveDependencyParser, ... )
>>> treebank_data = """Pierre NNP 2 NMOD ... Vinken NNP 8 SUB ... , , 2 P ... 61 CD 5 NMOD ... years NNS 6 AMOD ... old JJ 2 NMOD ... , , 2 P ... will MD 0 ROOT ... join VB 8 VC ... the DT 11 NMOD ... board NN 9 OBJ ... as IN 9 VMOD ... a DT 15 NMOD ... nonexecutive JJ 15 NMOD ... director NN 12 PMOD ... Nov. NNP 9 VMOD ... 29 CD 16 NMOD ... . . 9 VMOD ... """>>> dg = DependencyGraph(treebank_data) >>> dg.tree().pprint() (will (Vinken Pierre , (old (years 61)) ,) (join (board the) (as (director a nonexecutive)) (Nov. 29) .)) >>> for head, rel, dep in dg.triples(): ... print( ... '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})' ... .format(h=head, r=rel, d=dep) ... ) (will, MD), SUB, (Vinken, NNP) (Vinken, NNP), NMOD, (Pierre, NNP) (Vinken, NNP), P, (,, ,) (Vinken, NNP), NMOD, (old, JJ) (old, JJ), AMOD, (years, NNS) (years, NNS), NMOD, (61, CD) (Vinken, NNP), P, (,, ,) (will, MD), VC, (join, VB) (join, VB), OBJ, (board, NN) (board, NN), NMOD, (the, DT) (join, VB), VMOD, (as, IN) (as, IN), PMOD, (director, NN) (director, NN), NMOD, (a, DT) (director, NN), NMOD, (nonexecutive, JJ) (join, VB), VMOD, (Nov., NNP) (Nov., NNP), NMOD, (29, CD) (join, VB), VMOD, (., .)
Using the dependency-parsed version of the Penn Treebank corpus sample.
>>> from nltk.corpus import dependency_treebank >>> t = dependency_treebank.parsed_sents()[0] >>> print(t.to_conll(3)) # doctest: +NORMALIZE_WHITESPACE Pierre NNP 2 Vinken NNP 8 , , 2 61 CD 5 years NNS 6 old JJ 2 , , 2 will MD 0 join VB 8 the DT 11 board NN 9 as IN 9 a DT 15 nonexecutive JJ 15 director NN 12 Nov. NNP 9 29 CD 16 . . 8
Using the output of zpar (like Malt-TAB but with zero-based indexing)
>>> zpar_data = """ ... Pierre NNP 1 NMOD ... Vinken NNP 7 SUB ... , , 1 P ... 61 CD 4 NMOD ... years NNS 5 AMOD ... old JJ 1 NMOD ... , , 1 P ... will MD -1 ROOT ... join VB 7 VC ... the DT 10 NMOD ... board NN 8 OBJ ... as IN 8 VMOD ... a DT 14 NMOD ... nonexecutive JJ 14 NMOD ... director NN 11 PMOD ... Nov. NNP 8 VMOD ... 29 CD 15 NMOD ... . . 7 P ... """>>> zdg = DependencyGraph(zpar_data, zero_based=True) >>> print(zdg.tree()) (will (Vinken Pierre , (old (years 61)) ,) (join (board the) (as (director a nonexecutive)) (Nov. 29)) .)
>>> grammar = DependencyGrammar.fromstring(""" ... 'fell' -> 'price' | 'stock' ... 'price' -> 'of' 'the' ... 'of' -> 'stock' ... 'stock' -> 'the' ... """) >>> print(grammar) Dependency grammar with 5 productions 'fell' -> 'price' 'fell' -> 'stock' 'price' -> 'of' 'the' 'of' -> 'stock' 'stock' -> 'the'>>> dp = ProjectiveDependencyParser(grammar) >>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])): ... print(t) (fell (price the (of (stock the)))) (fell (price the of) (stock the)) (fell (price the of the) stock)
>>> grammar = DependencyGrammar.fromstring(""" ... 'taught' -> 'play' | 'man' ... 'man' -> 'the' ... 'play' -> 'golf' | 'dog' | 'to' ... 'dog' -> 'his' ... """) >>> print(grammar) Dependency grammar with 7 productions 'taught' -> 'play' 'taught' -> 'man' 'man' -> 'the' 'play' -> 'golf' 'play' -> 'dog' 'play' -> 'to' 'dog' -> 'his'>>> dp = NonprojectiveDependencyParser(grammar) >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])>>> print(g.root['word']) taught>>> for _, node in sorted(g.nodes.items()): ... if node['word'] is not None: ... print('{address} {word}: {d}'.format(d=node['deps'][''], **node)) 1 the: [] 2 man: [1] 3 taught: [2, 7] 4 his: [] 5 dog: [4] 6 to: [] 7 play: [5, 6, 8] 8 golf: []>>> print(g.tree()) (taught (man the) (play (dog his) to golf))