A corpus reader for CoNLL-style files. These files consist of a
series of sentences, seperated by blank lines. Each sentence is encoded
using a table (or grid) of values, where each line corresponds to
a single word, and each column corresponds to an annotation type. The
set of columns used by CoNLL-style files can vary from corpus to corpus;
the ConllCorpusReader
constructor therefore takes an
argument, columntypes
, which is used to specify the columns
that are used by a given corpus.
|
__init__(self,
root,
files,
columntypes,
chunk_types=None,
top_node=' S ' ,
pos_in_tree=False,
srl_includes_roleset=True,
encoding=None,
tree_class=<class 'nltk.tree.Tree'>)
x.__init__(...) initializes x; see x.__class__.__doc__ for signature |
source code
|
|
|
|
|
|
|
|
|
|
|
|
|
chunked_words(self,
files=None,
chunk_types=None) |
source code
|
|
|
chunked_sents(self,
files=None,
chunk_types=None) |
source code
|
|
|
parsed_sents(self,
files=None,
pos_in_tree=None) |
source code
|
|
|
|
|
srl_instances(self,
files=None,
pos_in_tree=None,
flatten=True) |
source code
|
|
list of tuple
|
|
list of list
|
|
|
|
|
|
|
|
|
|
|
|
|
_get_chunked_words(self,
grid,
chunk_types) |
source code
|
|
|
|
|
_get_srl_spans(self,
grid)
list of list of (start, end), tag) tuples |
source code
|
|
|
_get_srl_instances(self,
grid,
pos_in_tree) |
source code
|
|
|
|
Inherited from api.CorpusReader :
__repr__ ,
abspath ,
abspaths ,
encoding ,
files ,
open
Inherited from object :
__delattr__ ,
__getattribute__ ,
__hash__ ,
__new__ ,
__reduce__ ,
__reduce_ex__ ,
__setattr__ ,
__str__
|
|
|
|
|
|
|
|
|
Inherited from api.CorpusReader :
filenames
|
|
WORDS = ' words '
column type for words
|
|
POS = ' pos '
column type for part-of-speech tags
|
|
TREE = ' tree '
column type for parse trees
|
|
CHUNK = ' chunk '
column type for chunk structures
|
|
NE = ' ne '
column type for named entities
|
|
SRL = ' srl '
column type for semantic role labels
|
|
IGNORE = ' ignore '
column type for column that should be ignored
|
|
COLUMN_TYPES = ( ' words ' , ' pos ' , ' tree ' , ' chunk ' , ' ne ' , ' srl ' , ...
A list of all column types supported by the conll corpus reader.
|