Code Coverage for nltk.corpus.reader.string_category
Untested Functions
"""
Read tuples from a corpus consisting of categorized strings.
For example, from the question classification corpus:
NUM:dist How far is it from Denver to Aspen ?
LOC:city What county is Modesto , California in ?
HUM:desc Who was Galileo ?
DESC:def What is an atom ?
NUM:date When did Hawaii become a state ?
"""
from util import *
from api import *
import os
class StringCategoryCorpusReader(CorpusReader):
def __init__(self, root, files, delimiter=' ', encoding=None):
"""
@param root: The root directory for this corpus.
@param files: A list or regexp specifying the files in this corpus.
@param delimiter: Field delimiter
"""
CorpusReader.__init__(self, root, files, encoding)
self._delimiter = delimiter
def tuples(self, files):
return concat([StreamBackedCorpusView(filename, self._read_tuple_block,
encoding=enc)
for (filename, enc) in self.abspaths(files)])
def raw(self, files=None):
"""
@return: the text contents of the given files, as a single string.
"""
if files is None: files = self._files
elif isinstance(files, basestring): files = [files]
return concat([self.open(f).read() for f in files])
def _read_tuple_block(self, stream):
line = stream.readline().strip()
if line:
return [tuple(line.split(self._delimiter, 1))]
else:
return []