1
2
3
4
5
6
7
8
9 """
10 Functions for X{tokenizing}, i.e., dividing text strings into
11 substrings.
12 """
13
14 from simple import *
15 from regexp import *
16 from punkt import *
17 from sexpr import *
18 from nltk.internals import deprecated
19
20 __all__ = ['WhitespaceTokenizer', 'SpaceTokenizer', 'TabTokenizer',
21 'LineTokenizer', 'RegexpTokenizer', 'BlanklineTokenizer',
22 'WordPunctTokenizer', 'WordTokenizer', 'blankline_tokenize',
23 'wordpunct_tokenize', 'regexp_tokenize', 'word_tokenize',
24 'SExprTokenizer', 'sexpr_tokenize', 'line_tokenize',
25 'PunktWordTokenizer', 'punkt_word_tokenize',
26 'PunktSentenceTokenizer',
27 ]
28
29
30
31
32
33 @deprecated("Use nltk.blankline_tokenize() or "
34 "nltk.BlanklineTokenizer instead.")
37
38 @deprecated("Use nltk.wordpunct_tokenize() or "
39 "nltk.WordPunctTokenizer instead.")
42
43 @deprecated("Use str.split() or nltk.WhitespaceTokenizer instead.")
46
47 @deprecated("Use nltk.word_tokenize() or "
48 "nltk.WordTokenizer instead.")
51
52 @deprecated("Use nltk.line_tokenize() or "
53 "nltk.LineTokenizer instead.")
56
57
58