1
2
3
4
5
6
7
8 from nltk.compat import *
9 from nltk.corpus.reader.util import *
10 from nltk.corpus.reader.api import *
11 from nltk.corpus.reader.xmldocs import *
12 from nltk.util import LazyConcatenation
13 from nltk.internals import ElementWrapper
14 import re, textwrap
15
17
18 - def __init__(self, root, files, wrap_etree=False, tag_mapping_function=None):
21
22 - def xml_posts(self, files=None):
23 if self._wrap_etree:
24 return concat([XMLCorpusView(filename, 'Session/Posts/Post',
25 self._wrap_elt)
26 for filename in self.abspaths(files)])
27 else:
28 return concat([XMLCorpusView(filename, 'Session/Posts/Post')
29 for filename in self.abspaths(files)])
30
31 - def posts(self, files=None):
32 return concat([XMLCorpusView(filename, 'Session/Posts/Post/terminals',
33 self._elt_to_words)
34 for filename in self.abspaths(files)])
35
36 - def tagged_posts(self, files=None, simplify_tags=False):
37 def reader(elt, handler):
38 return self._elt_to_tagged_words(elt, handler, simplify_tags)
39 return concat([XMLCorpusView(filename, 'Session/Posts/Post/terminals',
40 reader)
41 for filename in self.abspaths(files)])
42
43 - def words(self, files=None):
45
48
51
55
57 tagged_post = [(self._simplify_username(t.attrib['word']),
58 t.attrib['pos']) for t in elt.findall('t')]
59 if simplify_tags:
60 tagged_post = [(w, self._tag_mapping_function(t))
61 for (w,t) in tagged_post]
62 return tagged_post
63
64 @staticmethod
69