>>> from nltk.ccg import chart, lexicon
Construct a lexicon:
>>> lex = lexicon.parseLexicon(''' ... :- S, NP, N, VP ... ... Det :: NP/N ... Pro :: NP ... Modal :: S\\NP/VP ... ... TV :: VP/NP ... DTV :: TV/NP ... ... the => Det ... ... that => Det ... that => NP ... ... I => Pro ... you => Pro ... we => Pro ... ... chef => N ... cake => N ... children => N ... dough => N ... ... will => Modal ... should => Modal ... might => Modal ... must => Modal ... ... and => var\\.,var/.,var ... ... to => VP[to]/VP ... ... without => (VP\\VP)/VP[ing] ... ... be => TV ... cook => TV ... eat => TV ... ... cooking => VP[ing]/NP ... ... give => DTV ... ... is => (S\\NP)/NP ... prefer => (S\\NP)/NP ... ... which => (N\\N)/(S/NP) ... ... persuade => (VP/VP[to])/NP ... ''')>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) >>> for parse in parser.parse("you prefer that cake".split()): # doctest: +SKIP ... chart.printCCGDerivation(parse) ... break ... you prefer that cake NP ((S\NP)/NP) (NP/N) N --------------> NP ---------------------------> (S\NP) --------------------------------< S>>> for parse in parser.parse("that is the cake which you prefer".split()): # doctest: +SKIP ... chart.printCCGDerivation(parse) ... break ... that is the cake which you prefer NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/NP) ----->T (S/(S\NP)) ------------------>B (S/NP) ----------------------------------> (N\N) ----------------------------------------< N ------------------------------------------------> NP -------------------------------------------------------------> (S\NP) -------------------------------------------------------------------< S
Some other sentences to try: "that is the cake which we will persuade the chef to cook" "that is the cake which we will persuade the chef to give the children"
>>> sent = "that is the dough which you will eat without cooking".split() >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet + ... chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
Without Substitution (no output)
>>> for parse in nosub_parser.parse(sent): ... chart.printCCGDerivation(parse)
With Substitution:
>>> for parse in parser.parse(sent): # doctest: +SKIP ... chart.printCCGDerivation(parse) ... break ... that is the dough which you will eat without cooking NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/VP) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) ----->T (S/(S\NP)) ------------------------------------->B ((VP\VP)/NP) ----------------------------------------------<Sx (VP/NP) ----------------------------------------------------------->B ((S\NP)/NP) ---------------------------------------------------------------->B (S/NP) --------------------------------------------------------------------------------> (N\N) ---------------------------------------------------------------------------------------< N -----------------------------------------------------------------------------------------------> NP ------------------------------------------------------------------------------------------------------------> (S\NP) ------------------------------------------------------------------------------------------------------------------< S
>>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation >>> from nltk.ccg import lexicon
Lexicons for the tests:
>>> test1_lex = ''' ... :- S,N,NP,VP ... I => NP ... you => NP ... will => S\\NP/VP ... cook => VP/NP ... which => (N\\N)/(S/NP) ... and => var\\.,var/.,var ... might => S\\NP/VP ... eat => VP/NP ... the => NP/N ... mushrooms => N ... parsnips => N''' >>> test2_lex = ''' ... :- N, S, NP, VP ... articles => N ... the => NP/N ... and => var\\.,var/.,var ... which => (N\\N)/(S/NP) ... I => NP ... anyone => NP ... will => (S/VP)\\NP ... file => VP/NP ... without => (VP\\VP)/VP[ing] ... forget => VP/NP ... reading => VP[ing]/NP ... '''
Tests handling of conjunctions. Note that while the two derivations are different, they are semantically equivalent.
>>> lex = lexicon.parseLexicon(test1_lex) >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()): ... printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE +SKIP I will cook and might eat the mushrooms and parsnips NP ((S\NP)/VP) (VP/NP) ((_var2\.,_var2)/.,_var2) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var2\.,_var2)/.,_var2) N ---------------------->B ((S\NP)/NP) ---------------------->B ((S\NP)/NP) -------------------------------------------------> (((S\NP)/NP)\.,((S\NP)/NP)) -----------------------------------------------------------------------< ((S\NP)/NP) -------------------------------------> (N\.,N) ------------------------------------------------< N --------------------------------------------------------> NP -------------------------------------------------------------------------------------------------------------------------------> (S\NP) -----------------------------------------------------------------------------------------------------------------------------------< S I will cook and might eat the mushrooms and parsnips NP ((S\NP)/VP) (VP/NP) ((_var2\.,_var2)/.,_var2) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var2\.,_var2)/.,_var2) N ---------------------->B ((S\NP)/NP) ---------------------->B ((S\NP)/NP) -------------------------------------------------> (((S\NP)/NP)\.,((S\NP)/NP)) -----------------------------------------------------------------------< ((S\NP)/NP) ------------------------------------------------------------------------------->B ((S\NP)/N) -------------------------------------> (N\.,N) ------------------------------------------------< N -------------------------------------------------------------------------------------------------------------------------------> (S\NP) -----------------------------------------------------------------------------------------------------------------------------------< S
Tests handling subject extraction. Interesting to point that the two parses are clearly semantically different.
>>> lex = lexicon.parseLexicon(test2_lex) >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) >>> for parse in parser.parse("articles which I will file and forget without reading".split()): ... printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE +SKIP articles which I will file and forget without reading N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var3\.,_var3)/.,_var3) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) -----------------< (S/VP) ------------------------------------->B ((VP\VP)/NP) ----------------------------------------------<Sx (VP/NP) -------------------------------------------------------------------------> ((VP/NP)\.,(VP/NP)) ----------------------------------------------------------------------------------< (VP/NP) --------------------------------------------------------------------------------------------------->B (S/NP) -------------------------------------------------------------------------------------------------------------------> (N\N) -----------------------------------------------------------------------------------------------------------------------------< N articles which I will file and forget without reading N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var3\.,_var3)/.,_var3) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) -----------------< (S/VP) ------------------------------------> ((VP/NP)\.,(VP/NP)) ---------------------------------------------< (VP/NP) ------------------------------------->B ((VP\VP)/NP) ----------------------------------------------------------------------------------<Sx (VP/NP) --------------------------------------------------------------------------------------------------->B (S/NP) -------------------------------------------------------------------------------------------------------------------> (N\N) -----------------------------------------------------------------------------------------------------------------------------< N
Unicode words are supported.
>>> from nltk.ccg import chart, lexicon
Lexicons for the tests:
>>> lex = lexicon.parseLexicon(u''' ... :- S, N, NP, PP ... ... AdjI :: N\\N ... AdjD :: N/N ... AdvD :: S/S ... AdvI :: S\\S ... Det :: NP/N ... PrepNPCompl :: PP/NP ... PrepNAdjN :: S\\S/N ... PrepNAdjNP :: S\\S/NP ... VPNP :: S\\NP/NP ... VPPP :: S\\NP/PP ... VPser :: S\\NP/AdjI ... ... auto => N ... bebidas => N ... cine => N ... ley => N ... libro => N ... ministro => N ... panadería => N ... presidente => N ... super => N ... ... el => Det ... la => Det ... las => Det ... un => Det ... ... Ana => NP ... Pablo => NP ... ... y => var\\.,var/.,var ... ... pero => (S/NP)\\(S/NP)/(S/NP) ... ... anunció => VPNP ... compró => VPNP ... cree => S\\NP/S[dep] ... desmintió => VPNP ... lee => VPNP ... fueron => VPPP ... ... es => VPser ... ... interesante => AdjD ... interesante => AdjI ... nueva => AdjD ... nueva => AdjI ... ... a => PrepNPCompl ... en => PrepNAdjN ... en => PrepNAdjNP ... ... ayer => AdvI ... ... que => (NP\\NP)/(S/NP) ... que => S[dep]/S ... ''')>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()): ... printCCGDerivation(parse) ... break el ministro anunció pero el presidente desmintió la nueva ley (NP/N) N ((S\NP)/NP) (((S/NP)\(S/NP))/(S/NP)) (NP/N) N ((S\NP)/NP) (NP/N) (N/N) N --------Leaf (NP/N) ----------Leaf N ------------------> NP ------------------>T (S/(S\NP)) -------------Leaf ((S\NP)/NP) --------------------------Leaf (((S/NP)\(S/NP))/(S/NP)) --------Leaf (NP/N) ------------Leaf N --------------------> NP -------------------->T (S/(S\NP)) -------------Leaf ((S\NP)/NP) --------------------------------->B (S/NP) -----------------------------------------------------------> ((S/NP)\(S/NP)) --------Leaf (NP/N) -------Leaf (N/N) -----Leaf N ------------> N --------------------> NP --------------------<T (S\(S/NP)) -------------------------------------------------------------------------------<B (S\(S/NP)) --------------------------------------------------------------------------------------------<B (S/NP) --------------------------------------------------------------------------------------------------------------> S