nltk.tag.api

1 # Natural Language Toolkit: Tagger Interface 2 # 3 # Copyright (C) 2001-2008 NLTK Project 4 # Author: Edward Loper <[email protected]> 5 # Steven Bird <[email protected]> (minor additions) 6 # URL: <http://nltk.org> 7 # For license information, see LICENSE.TXT 8 9 """ 10 Interface for tagging each token in a sentence with supplementary 11 information, such as its part of speech. 12 """ 13 14 from nltk.internals import overridden 15

16 -class TaggerI(object):

17 """ 18 A processing interface for assigning a tag to each token in a list. 19 Tags are case sensitive strings that identify some property of each 20 token, such as its part of speech or its sense. 21 22 Some taggers require specific types for their tokens. This is 23 generally indicated by the use of a sub-interface to C{TaggerI}. 24 For example, I{featureset taggers}, which are subclassed from 25 L{FeaturesetTaggerI}, require that each token be a I{featureset}. 26 27 Subclasses must define: 28 - either L{tag()} or L{batch_tag()} (or both) 29 """

30 - def tag(self, tokens):

31 """ 32 Determine the most appropriate tag sequence for the given 33 token sequence, and return a corresponding list of tagged 34 tokens. A tagged token is encoded as a tuple C{(token, tag)}. 35 36 @rtype: C{list} of C{(token, tag)} 37 """ 38 if overridden(self.batch_tag): 39 return self.batch_tag([tokens])[0] 40 else: 41 raise NotImplementedError()

42

43 - def batch_tag(self, sentences):

44 """ 45 Apply L{self.tag()} to each element of C{sentences}. I.e.: 46 47 >>> return [self.tag(tokens) for tokens in sentences] 48 """ 49 return [self.tag(tokens) for tokens in sentences]

50

51 -class FeaturesetTaggerI(TaggerI):

52 """ 53 A tagger that requires tokens to be I{featuresets}. A featureset 54 is a dictionary that maps from I{feature names} to I{feature 55 values}. See L{nltk.classify} for more information about features 56 and featuresets. 57 """

58 59

60 -class HiddenMarkovModelTaggerTransformI(object):

61 """ 62 An interface for a transformation to be used as the transform parameter 63 of C{HiddenMarkovModelTagger}. 64 """

65 - def __init__(self):

66 if self.__class__ == HiddenMarkovModelTaggerTransformI: 67 raise AssertionError, "Interfaces can't be instantiated"

68

69 - def transform(self, labeled_symbols):

70 """ 71 @return: a C{list} of transformed symbols 72 @rtype: C{list} 73 @param labeled_symbols: a C{list} of labeled untransformed symbols, 74 i.e. symbols that are not (token, tag) or (word, tag) 75 @type labeled_symbols: C{list} 76 """ 77 raise NotImplementedError()

78

Source Code for Module nltk.tag.api