1
2
3
4
5
6
7
8
9 """
10 Read lines from the Prepositional Phrase Attachment Corpus.
11
12 The PP Attachment Corpus contains several files having the format:
13
14 sentence_id verb noun1 preposition noun2 attachment
15
16 For example:
17
18 42960 gives authority to administration V
19 46742 gives inventors of microchip N
20
21 The PP attachment is to the verb phrase (V) or noun phrase (N), i.e.:
22
23 (VP gives (NP authority) (PP to administration))
24 (VP gives (NP inventors (PP of microchip)))
25
26 The corpus contains the following files:
27
28 training: training set
29 devset: development test set, used for algorithm development.
30 test: test set, used to report results
31 bitstrings: word classes derived from Mutual Information Clustering for the Wall Street Journal.
32
33 Ratnaparkhi, Adwait (1994). A Maximum Entropy Model for Prepositional
34 Phrase Attachment. Proceedings of the ARPA Human Language Technology
35 Conference. [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps]
36
37 The PP Attachment Corpus is distributed with NLTK with the permission
38 of the author.
39 """
40
41 from util import *
42 from api import *
43 from nltk import tokenize
44 import codecs
45 from nltk.internals import deprecated
46
48 - def __init__(self, sent, verb, noun1, prep, noun2, attachment):
49 self.sent = sent
50 self.verb = verb
51 self.noun1 = noun1
52 self.prep = prep
53 self.noun2 = noun2
54 self.attachment = attachment
55
57 return ('PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, '
58 'noun2=%r, attachment=%r)' %
59 (self.sent, self.verb, self.noun1, self.prep,
60 self.noun2, self.attachment))
61
63 """
64 sentence_id verb noun1 preposition noun2 attachment
65 """
70
75
76 - def raw(self, files=None):
80
87
94
95
96 @deprecated("Use .tuples() or .raw() or .attachments() instead.")
97 - def read(self, items, format='tuple'):
101
102