>>> from nltk.ccg import chart, lexicon
Construct a lexicon:
>>> lex = lexicon.parseLexicon(''' ... :- S, NP, N, VP ... ... Det :: NP/N ... Pro :: NP ... Modal :: S\\NP/VP ... ... TV :: VP/NP ... DTV :: TV/NP ... ... the => Det ... ... that => Det ... that => NP ... ... I => Pro ... you => Pro ... we => Pro ... ... chef => N ... cake => N ... children => N ... dough => N ... ... will => Modal ... should => Modal ... might => Modal ... must => Modal ... ... and => var\\.,var/.,var ... ... to => VP[to]/VP ... ... without => (VP\\VP)/VP[ing] ... ... be => TV ... cook => TV ... eat => TV ... ... cooking => VP[ing]/NP ... ... give => DTV ... ... is => (S\\NP)/NP ... prefer => (S\\NP)/NP ... ... which => (N\\N)/(S/NP) ... ... persuade => (VP/VP[to])/NP ... ''')>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) >>> for parse in parser.parse("you prefer that cake".split()): # doctest: +SKIP ... chart.printCCGDerivation(parse) ... break ... you prefer that cake NP ((S\NP)/NP) (NP/N) N --------------> NP ---------------------------> (S\NP) --------------------------------< S>>> for parse in parser.parse("that is the cake which you prefer".split()): # doctest: +SKIP ... chart.printCCGDerivation(parse) ... break ... that is the cake which you prefer NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/NP) ----->T (S/(S\NP)) ------------------>B (S/NP) ----------------------------------> (N\N) ----------------------------------------< N ------------------------------------------------> NP -------------------------------------------------------------> (S\NP) -------------------------------------------------------------------< S
Some other sentences to try: "that is the cake which we will persuade the chef to cook" "that is the cake which we will persuade the chef to give the children"
>>> sent = "that is the dough which you will eat without cooking".split() >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet + ... chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
Without Substitution (no output)
>>> for parse in nosub_parser.parse(sent): ... chart.printCCGDerivation(parse)
With Substitution:
>>> for parse in parser.parse(sent): # doctest: +SKIP
... chart.printCCGDerivation(parse)
... break
...
that is the dough which you will eat without cooking
NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/VP) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
----->T
(S/(S\NP))
------------------------------------->B
((VP\VP)/NP)
----------------------------------------------<Sx
(VP/NP)
----------------------------------------------------------->B
((S\NP)/NP)
---------------------------------------------------------------->B
(S/NP)
-------------------------------------------------------------------------------->
(N\N)
---------------------------------------------------------------------------------------<
N
----------------------------------------------------------------------------------------------->
NP
------------------------------------------------------------------------------------------------------------>
(S\NP)
------------------------------------------------------------------------------------------------------------------<
S
>>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation >>> from nltk.ccg import lexicon
Lexicons for the tests:
>>> test1_lex = ''' ... :- S,N,NP,VP ... I => NP ... you => NP ... will => S\\NP/VP ... cook => VP/NP ... which => (N\\N)/(S/NP) ... and => var\\.,var/.,var ... might => S\\NP/VP ... eat => VP/NP ... the => NP/N ... mushrooms => N ... parsnips => N''' >>> test2_lex = ''' ... :- N, S, NP, VP ... articles => N ... the => NP/N ... and => var\\.,var/.,var ... which => (N\\N)/(S/NP) ... I => NP ... anyone => NP ... will => (S/VP)\\NP ... file => VP/NP ... without => (VP\\VP)/VP[ing] ... forget => VP/NP ... reading => VP[ing]/NP ... '''
Tests handling of conjunctions. Note that while the two derivations are different, they are semantically equivalent.
>>> lex = lexicon.parseLexicon(test1_lex)
>>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
>>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
... printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE +SKIP
I will cook and might eat the mushrooms and parsnips
NP ((S\NP)/VP) (VP/NP) ((_var2\.,_var2)/.,_var2) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var2\.,_var2)/.,_var2) N
---------------------->B
((S\NP)/NP)
---------------------->B
((S\NP)/NP)
------------------------------------------------->
(((S\NP)/NP)\.,((S\NP)/NP))
-----------------------------------------------------------------------<
((S\NP)/NP)
------------------------------------->
(N\.,N)
------------------------------------------------<
N
-------------------------------------------------------->
NP
------------------------------------------------------------------------------------------------------------------------------->
(S\NP)
-----------------------------------------------------------------------------------------------------------------------------------<
S
I will cook and might eat the mushrooms and parsnips
NP ((S\NP)/VP) (VP/NP) ((_var2\.,_var2)/.,_var2) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var2\.,_var2)/.,_var2) N
---------------------->B
((S\NP)/NP)
---------------------->B
((S\NP)/NP)
------------------------------------------------->
(((S\NP)/NP)\.,((S\NP)/NP))
-----------------------------------------------------------------------<
((S\NP)/NP)
------------------------------------------------------------------------------->B
((S\NP)/N)
------------------------------------->
(N\.,N)
------------------------------------------------<
N
------------------------------------------------------------------------------------------------------------------------------->
(S\NP)
-----------------------------------------------------------------------------------------------------------------------------------<
S
Tests handling subject extraction. Interesting to point that the two parses are clearly semantically different.
>>> lex = lexicon.parseLexicon(test2_lex)
>>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
>>> for parse in parser.parse("articles which I will file and forget without reading".split()):
... printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE +SKIP
articles which I will file and forget without reading
N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var3\.,_var3)/.,_var3) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
-----------------<
(S/VP)
------------------------------------->B
((VP\VP)/NP)
----------------------------------------------<Sx
(VP/NP)
------------------------------------------------------------------------->
((VP/NP)\.,(VP/NP))
----------------------------------------------------------------------------------<
(VP/NP)
--------------------------------------------------------------------------------------------------->B
(S/NP)
------------------------------------------------------------------------------------------------------------------->
(N\N)
-----------------------------------------------------------------------------------------------------------------------------<
N
articles which I will file and forget without reading
N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var3\.,_var3)/.,_var3) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP)
-----------------<
(S/VP)
------------------------------------>
((VP/NP)\.,(VP/NP))
---------------------------------------------<
(VP/NP)
------------------------------------->B
((VP\VP)/NP)
----------------------------------------------------------------------------------<Sx
(VP/NP)
--------------------------------------------------------------------------------------------------->B
(S/NP)
------------------------------------------------------------------------------------------------------------------->
(N\N)
-----------------------------------------------------------------------------------------------------------------------------<
N
Unicode words are supported.
>>> from nltk.ccg import chart, lexicon
Lexicons for the tests:
>>> lex = lexicon.parseLexicon(u''' ... :- S, N, NP, PP ... ... AdjI :: N\\N ... AdjD :: N/N ... AdvD :: S/S ... AdvI :: S\\S ... Det :: NP/N ... PrepNPCompl :: PP/NP ... PrepNAdjN :: S\\S/N ... PrepNAdjNP :: S\\S/NP ... VPNP :: S\\NP/NP ... VPPP :: S\\NP/PP ... VPser :: S\\NP/AdjI ... ... auto => N ... bebidas => N ... cine => N ... ley => N ... libro => N ... ministro => N ... panadería => N ... presidente => N ... super => N ... ... el => Det ... la => Det ... las => Det ... un => Det ... ... Ana => NP ... Pablo => NP ... ... y => var\\.,var/.,var ... ... pero => (S/NP)\\(S/NP)/(S/NP) ... ... anunció => VPNP ... compró => VPNP ... cree => S\\NP/S[dep] ... desmintió => VPNP ... lee => VPNP ... fueron => VPPP ... ... es => VPser ... ... interesante => AdjD ... interesante => AdjI ... nueva => AdjD ... nueva => AdjI ... ... a => PrepNPCompl ... en => PrepNAdjN ... en => PrepNAdjNP ... ... ayer => AdvI ... ... que => (NP\\NP)/(S/NP) ... que => S[dep]/S ... ''')>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()): ... printCCGDerivation(parse) ... break el ministro anunció pero el presidente desmintió la nueva ley (NP/N) N ((S\NP)/NP) (((S/NP)\(S/NP))/(S/NP)) (NP/N) N ((S\NP)/NP) (NP/N) (N/N) N --------Leaf (NP/N) ----------Leaf N ------------------> NP ------------------>T (S/(S\NP)) -------------Leaf ((S\NP)/NP) --------------------------Leaf (((S/NP)\(S/NP))/(S/NP)) --------Leaf (NP/N) ------------Leaf N --------------------> NP -------------------->T (S/(S\NP)) -------------Leaf ((S\NP)/NP) --------------------------------->B (S/NP) -----------------------------------------------------------> ((S/NP)\(S/NP)) --------Leaf (NP/N) -------Leaf (N/N) -----Leaf N ------------> N --------------------> NP --------------------<T (S\(S/NP)) -------------------------------------------------------------------------------<B (S\(S/NP)) --------------------------------------------------------------------------------------------<B (S/NP) --------------------------------------------------------------------------------------------------------------> S