1
2
3
4
5
6
7
8
9 import re
10 import string
11
12 from nltk import Tree
13 import nltk.tag.util
14
15 from api import *
16
17
18
19
20
21 from nltk import evaluate
23 """
24 Score the accuracy of the chunker against the gold standard.
25 Strip the chunk information from the gold standard and rechunk it using
26 the chunker, then compute the accuracy score.
27
28 @type chunker: C{ChunkParserI}
29 @param chunker: The chunker being evaluated.
30 @type gold: C{tree}
31 @param gold: The chunk structures to score the chunker on.
32 @rtype: C{float}
33 """
34
35 gold_tags = []
36 test_tags = []
37 for gold_tree in gold:
38 test_tree = chunker.parse(gold_tree.flatten())
39 gold_tags += tree2conlltags(gold_tree)
40 test_tags += tree2conlltags(test_tree)
41
42
43
44 return evaluate.accuracy(gold_tags, test_tags)
45
46
47
48
49
50
51
53 """
54 A utility class for scoring chunk parsers. C{ChunkScore} can
55 evaluate a chunk parser's output, based on a number of statistics
56 (precision, recall, f-measure, misssed chunks, incorrect chunks).
57 It can also combine the scores from the parsing of multiple texts;
58 this makes it signifigantly easier to evaluate a chunk parser that
59 operates one sentence at a time.
60
61 Texts are evaluated with the C{score} method. The results of
62 evaluation can be accessed via a number of accessor methods, such
63 as C{precision} and C{f_measure}. A typical use of the
64 C{ChunkScore} class is::
65
66 >>> chunkscore = ChunkScore()
67 >>> for correct in correct_sentences:
68 ... guess = chunkparser.parse(correct.leaves())
69 ... chunkscore.score(correct, guess)
70 >>> print 'F Measure:', chunkscore.f_measure()
71 F Measure: 0.823
72
73 @ivar kwargs: Keyword arguments:
74
75 - max_tp_examples: The maximum number actual examples of true
76 positives to record. This affects the C{correct} member
77 function: C{correct} will not return more than this number
78 of true positive examples. This does *not* affect any of
79 the numerical metrics (precision, recall, or f-measure)
80
81 - max_fp_examples: The maximum number actual examples of false
82 positives to record. This affects the C{incorrect} member
83 function and the C{guessed} member function: C{incorrect}
84 will not return more than this number of examples, and
85 C{guessed} will not return more than this number of true
86 positive examples. This does *not* affect any of the
87 numerical metrics (precision, recall, or f-measure)
88
89 - max_fn_examples: The maximum number actual examples of false
90 negatives to record. This affects the C{missed} member
91 function and the C{correct} member function: C{missed}
92 will not return more than this number of examples, and
93 C{correct} will not return more than this number of true
94 negative examples. This does *not* affect any of the
95 numerical metrics (precision, recall, or f-measure)
96
97 - chunk_node: A regular expression indicating which chunks
98 should be compared. Defaults to C{'.*'} (i.e., all chunks).
99
100 @type _tp: C{list} of C{Token}
101 @ivar _tp: List of true positives
102 @type _fp: C{list} of C{Token}
103 @ivar _fp: List of false positives
104 @type _fn: C{list} of C{Token}
105 @ivar _fn: List of false negatives
106
107 @type _tp_num: C{int}
108 @ivar _tp_num: Number of true positives
109 @type _fp_num: C{int}
110 @ivar _fp_num: Number of false positives
111 @type _fn_num: C{int}
112 @ivar _fn_num: Number of false negatives.
113 """
115 self._correct = set()
116 self._guessed = set()
117 self._tp = set()
118 self._fp = set()
119 self._fn = set()
120 self._max_tp = kwargs.get('max_tp_examples', 100)
121 self._max_fp = kwargs.get('max_fp_examples', 100)
122 self._max_fn = kwargs.get('max_fn_examples', 100)
123 self._chunk_node = kwargs.get('chunk_node', '.*')
124 self._tp_num = 0
125 self._fp_num = 0
126 self._fn_num = 0
127 self._count = 0
128
129 self._measuresNeedUpdate = False
130
132 if (self._measuresNeedUpdate):
133 self._tp = self._guessed & self._correct
134 self._fn = self._correct - self._guessed
135 self._fp = self._guessed - self._correct
136 self._tp_num = len(self._tp)
137 self._fp_num = len(self._fp)
138 self._fn_num = len(self._fn)
139 self._measuresNeedUpdate = False
140
141 - def score(self, correct, guessed):
142 """
143 Given a correctly chunked sentence, score another chunked
144 version of the same sentence.
145
146 @type correct: chunk structure
147 @param correct: The known-correct ("gold standard") chunked
148 sentence.
149 @type guessed: chunk structure
150 @param guessed: The chunked sentence to be scored.
151 """
152 self._correct |= _chunksets(correct, self._count, self._chunk_node)
153 self._guessed |= _chunksets(guessed, self._count, self._chunk_node)
154 self._count += 1
155 self._measuresNeedUpdate = True
156
158 """
159 @return: the overall precision for all texts that have been
160 scored by this C{ChunkScore}.
161 @rtype: C{float}
162 """
163 self._updateMeasures()
164 div = self._tp_num + self._fp_num
165 if div == 0: return 0
166 else: return float(self._tp_num) / div
167
169 """
170 @return: the overall recall for all texts that have been
171 scored by this C{ChunkScore}.
172 @rtype: C{float}
173 """
174 self._updateMeasures()
175 div = self._tp_num + self._fn_num
176 if div == 0: return 0
177 else: return float(self._tp_num) / div
178
180 """
181 @return: the overall F measure for all texts that have been
182 scored by this C{ChunkScore}.
183 @rtype: C{float}
184
185 @param alpha: the relative weighting of precision and recall.
186 Larger alpha biases the score towards the precision value,
187 while smaller alpha biases the score towards the recall
188 value. C{alpha} should have a value in the range [0,1].
189 @type alpha: C{float}
190 """
191 self._updateMeasures()
192 p = self.precision()
193 r = self.recall()
194 if p == 0 or r == 0:
195 return 0
196 return 1/(alpha/p + (1-alpha)/r)
197
199 """
200 @rtype: C{list} of chunks
201 @return: the chunks which were included in the
202 correct chunk structures, but not in the guessed chunk
203 structures, listed in input order.
204 """
205 self._updateMeasures()
206 chunks = list(self._fn)
207 return [c[1] for c in chunks]
208
210 """
211 @rtype: C{list} of chunks
212 @return: the chunks which were included in the
213 guessed chunk structures, but not in the correct chunk
214 structures, listed in input order.
215 """
216 self._updateMeasures()
217 chunks = list(self._fp)
218 return [c[1] for c in chunks]
219
221 """
222 @rtype: C{list} of chunks
223 @return: the chunks which were included in the correct
224 chunk structures, listed in input order.
225 """
226 chunks = list(self._correct)
227 return [c[1] for c in chunks]
228
230 """
231 @rtype: C{list} of chunks
232 @return: the chunks which were included in the guessed
233 chunk structures, listed in input order.
234 """
235 chunks = list(self._guessed)
236 return [c[1] for c in chunks]
237
241
243 """
244 @rtype: C{String}
245 @return: a concise representation of this C{ChunkScoring}.
246 """
247 return '<ChunkScoring of '+`len(self)`+' chunks>'
248
250 """
251 @rtype: C{String}
252 @return: a verbose representation of this C{ChunkScoring}.
253 This representation includes the precision, recall, and
254 f-measure scores. For other information about the score,
255 use the accessor methods (e.g., C{missed()} and
256 C{incorrect()}).
257 """
258 return ("ChunkParse score:\n" +
259 (" Precision: %5.1f%%\n" % (self.precision()*100)) +
260 (" Recall: %5.1f%%\n" % (self.recall()*100))+
261 (" F-Measure: %5.1f%%" % (self.f_measure()*100)))
262
263
264
276
277
321
322
323
324 _LINE_RE = re.compile('(\S+)\s+(\S+)\s+([IOB])-?(\S+)?')
325 -def conllstr2tree(s, chunk_types=('NP', 'PP', 'VP'), top_node="S"):
326 """
327 Convert a CoNLL IOB string into a tree. Uses the specified chunk types
328 (defaults to NP, PP and VP), and creates a tree rooted at a node
329 labeled S (by default).
330
331 @param s: The CoNLL string to be converted.
332 @type s: C{string}
333 @param chunk_types: The chunk types to be converted.
334 @type chunk_types: C{tuple}
335 @param top_node: The node label to use for the root.
336 @type top_node: C{string}
337 @return: A chunk structure for a single sentence
338 encoded in the given CONLL 2000 style string.
339 @rtype: L{Tree}
340 """
341
342 stack = [Tree(top_node, [])]
343
344 for lineno, line in enumerate(s.split('\n')):
345 if not line.strip(): continue
346
347
348 match = _LINE_RE.match(line)
349 if match is None:
350 raise ValueError, 'Error on line %d' % lineno
351 (word, tag, state, chunk_type) = match.groups()
352
353
354 if (chunk_types is not None and
355 chunk_type not in chunk_types):
356 state = 'O'
357
358
359
360 mismatch_I = state == 'I' and chunk_type != stack[-1].node
361 if state in 'BO' or mismatch_I:
362 if len(stack) == 2: stack.pop()
363
364
365 if state == 'B' or mismatch_I:
366 chunk = Tree(chunk_type, [])
367 stack[-1].append(chunk)
368 stack.append(chunk)
369
370
371 stack[-1].append((word, tag))
372
373 return stack[0]
374
398
400 """
401 Convert a tree to the CoNLL IOB string format
402
403 @param t: The tree to be converted.
404 @type t: C{Tree}
405 @return: A multiline string where each line contains a word, tag and IOB tag.
406 @rtype: C{string}
407 """
408 lines = [string.join(token) for token in tree2conlltags(t)]
409 return '\n'.join(lines)
410
411
412
413 _IEER_DOC_RE = re.compile(r'<DOC>\s*'
414 r'(<DOCNO>\s*(?P<docno>.+?)\s*</DOCNO>\s*)?'
415 r'(<DOCTYPE>\s*(?P<doctype>.+?)\s*</DOCTYPE>\s*)?'
416 r'(<DATE_TIME>\s*(?P<date_time>.+?)\s*</DATE_TIME>\s*)?'
417 r'<BODY>\s*'
418 r'(<HEADLINE>\s*(?P<headline>.+?)\s*</HEADLINE>\s*)?'
419 r'<TEXT>(?P<text>.*?)</TEXT>\s*'
420 r'</BODY>\s*</DOC>\s*', re.DOTALL)
421
422 _IEER_TYPE_RE = re.compile('<b_\w+\s+[^>]*?type="(?P<type>\w+)"')
423
424 -def _ieer_read_text(s, top_node):
425 stack = [Tree(top_node, [])]
426
427
428 if s is None:
429 return []
430 for piece_m in re.finditer('<[^>]+>|[^\s<]+', s):
431 piece = piece_m.group()
432 try:
433 if piece.startswith('<b_'):
434 m = _IEER_TYPE_RE.match(piece)
435 if m is None: print 'XXXX', piece
436 chunk = Tree(m.group('type'), [])
437 stack[-1].append(chunk)
438 stack.append(chunk)
439 elif piece.startswith('<e_'):
440 stack.pop()
441
442
443
444 else:
445 stack[-1].append(piece)
446 except (IndexError, ValueError):
447 raise ValueError('Bad IEER string (error at character %d)' %
448 piece_m.start())
449 if len(stack) != 1:
450 raise ValueError('Bad IEER string')
451 return stack[0]
452
453 -def ieerstr2tree(s, chunk_types = ['LOCATION', 'ORGANIZATION', 'PERSON', 'DURATION',
454 'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE'], top_node="S"):
455 """
456 Convert a string of chunked tagged text in the IEER named
457 entity format into a chunk structure. Chunks are of several
458 types, LOCATION, ORGANIZATION, PERSON, DURATION, DATE, CARDINAL,
459 PERCENT, MONEY, and MEASURE.
460
461 @return: A chunk structure containing the chunked tagged text that is
462 encoded in the given IEER style string.
463 @rtype: L{Tree}
464 """
465
466
467
468 m = _IEER_DOC_RE.match(s)
469 if m:
470 return {
471 'text': _ieer_read_text(m.group('text'), top_node),
472 'docno': m.group('docno'),
473 'doctype': m.group('doctype'),
474 'date_time': m.group('date_time'),
475
476
477 'headline': _ieer_read_text(m.group('headline'), top_node),
478 }
479 else:
480 return _ieer_read_text(s, top_node)
481
482
484
485 s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
486 from nltk import chunk
487 t = chunk.tagstr2tree(s, chunk_node='NP')
488 print t.pprint()
489 print
490
491 s = """
492 These DT B-NP
493 research NN I-NP
494 protocols NNS I-NP
495 offer VBP B-VP
496 to TO B-PP
497 the DT B-NP
498 patient NN I-NP
499 not RB O
500 only RB O
501 the DT B-NP
502 very RB I-NP
503 best JJS I-NP
504 therapy NN I-NP
505 which WDT B-NP
506 we PRP B-NP
507 have VBP B-VP
508 established VBN I-VP
509 today NN B-NP
510 but CC B-NP
511 also RB I-NP
512 the DT B-NP
513 hope NN I-NP
514 of IN B-PP
515 something NN B-NP
516 still RB B-ADJP
517 better JJR I-ADJP
518 . . O
519 """
520
521 conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP'))
522 print conll_tree.pprint()
523
524
525 print "CoNLL output:"
526 print chunk.tree2conllstr(conll_tree)
527 print
528
529
530 if __name__ == '__main__':
531 demo()
532