1
2
3 from nltk.corpus import gutenberg, genesis, inaugural, nps_chat
4 from nltk.text import Text
5
6 print "Introductory Examples for the NLTK Book"
7 print
8 print "Loading 5 texts..."
9
10 text1 = Text(gutenberg.words('melville-moby_dick.txt'))
11 print "text1:", text1.name
12
13 text2 = Text(gutenberg.words('austen-sense.txt'))
14 print "text2:", text2.name
15
16 text3 = Text(genesis.words('english-kjv.txt'), name="The Book of Genesis")
17 print "text3:", text3.name
18
19 text4 = Text(inaugural.words(), name="Inaugural Address Corpus")
20 print "text4:", text4.name
21
22 text5 = Text(nps_chat.words(['10-26-teens_706posts.xml',
23 '11-08-teens_706posts.xml',
24 '11-09-teens_706posts.xml']),
25 name="Teen Chat Corpus")
26 print "text5:", text5.name
27