1
2
3
4
5
6
7
8
9
10 """
11 Utility functions for evaluating processing modules.
12 """
13
14 import sys
15 import math
16 import random
17
18 try:
19 from scipy.stats.stats import betai
20 except ImportError:
21 betai = None
22
23 from util import LazyConcatenation, LazyMap, LazyZip
24 from probability import FreqDist
25
27 """
28 Given a list of reference values and a corresponding list of test
29 values, return the percentage of corresponding values that are
30 equal. In particular, return the percentage of indices
31 C{0<i<=len(test)} such that C{test[i] == reference[i]}.
32
33 @type reference: C{list}
34 @param reference: An ordered list of reference values.
35 @type test: C{list}
36 @param test: A list of values to compare against the corresponding
37 reference values.
38 @raise ValueError: If C{reference} and C{length} do not have the
39 same length.
40 """
41 if len(reference) != len(test):
42 raise ValueError("Lists must have the same length.")
43 num_correct = 0
44 for x, y in LazyZip(reference, test):
45 if x == y:
46 num_correct += 1
47 return float(num_correct) / len(reference)
48
50 """
51 Given a set of reference values and a set of test values, return
52 the percentage of test values that appear in the reference set.
53 In particular, return |C{reference}S{cap}C{test}|/|C{test}|.
54 If C{test} is empty, then return C{None}.
55
56 @type reference: C{Set}
57 @param reference: A set of reference values.
58 @type test: C{Set}
59 @param test: A set of values to compare against the reference set.
60 @rtype: C{float} or C{None}
61 """
62 if len(test) == 0:
63 return None
64 else:
65 return float(len(reference.intersection(test)))/len(test)
66
68 """
69 Given a set of reference values and a set of test values, return
70 the percentage of reference values that appear in the test set.
71 In particular, return |C{reference}S{cap}C{test}|/|C{reference}|.
72 If C{reference} is empty, then return C{None}.
73
74 @type reference: C{Set}
75 @param reference: A set of reference values.
76 @type test: C{Set}
77 @param test: A set of values to compare against the reference set.
78 @rtype: C{float} or C{None}
79 """
80 if len(reference) == 0:
81 return None
82 else:
83 return float(len(reference.intersection(test)))/len(reference)
84
86 """
87 Given a set of reference values and a set of test values, return
88 the f-measure of the test values, when compared against the
89 reference values. The f-measure is the harmonic mean of the
90 L{precision} and L{recall}, weighted by C{alpha}. In particular,
91 given the precision M{p} and recall M{r} defined by:
92 - M{p} = |C{reference}S{cap}C{test}|/|C{test}|
93 - M{r} = |C{reference}S{cap}C{test}|/|C{reference}|
94 The f-measure is:
95 - 1/(C{alpha}/M{p} + (1-C{alpha})/M{r})
96
97 If either C{reference} or C{test} is empty, then C{f_measure}
98 returns C{None}.
99
100 @type reference: C{Set}
101 @param reference: A set of reference values.
102 @type test: C{Set}
103 @param test: A set of values to compare against the reference set.
104 @rtype: C{float} or C{None}
105 """
106 p = precision(reference, test)
107 r = recall(reference, test)
108 if p is None or r is None:
109 return None
110 if p == 0 or r == 0:
111 return 0
112 return 1.0/(alpha/p + (1-alpha)/r)
113
115 """
116 Given a list of reference values and a corresponding list of test
117 probability distributions, return the average log likelihood of
118 the reference values, given the probability distributions.
119
120 @param reference: A list of reference values
121 @type reference: C{list}
122 @param test: A list of probability distributions over values to
123 compare against the corresponding reference values.
124 @type test: C{list} of L{ProbDistI}
125 """
126 if len(reference) != len(test):
127 raise ValueError("Lists must have the same length.")
128
129
130 total_likelihood = sum(dist.logprob(val)
131 for (val, dist) in zip(reference, test))
132 return total_likelihood/len(reference)
133
135 """
136 Returns an approximate significance level between two lists of
137 independently generated test values.
138
139 Approximate randomization calculates significance by randomly drawing
140 from a sample of the possible permutations. At the limit of the number
141 of possible permutations, the significance level is exact. The
142 approximate significance level is the sample mean number of times the
143 statistic of the permutated lists varies from the actual statistic of
144 the unpermuted argument lists.
145
146 @return: a tuple containing an approximate significance level, the count
147 of the number of times the pseudo-statistic varied from the
148 actual statistic, and the number of shuffles
149 @rtype: C{tuple}
150 @param a: a list of test values
151 @type a: C{list}
152 @param b: another list of independently generated test values
153 @type b: C{list}
154 """
155 shuffles = kwargs.get('shuffles', 999)
156
157 shuffles = \
158 min(shuffles, reduce(lambda x, y: x * y, xrange(1, len(a) + len(b) + 1)))
159 stat = kwargs.get('statistic', lambda lst: float(sum(lst)) / len(lst))
160 verbose = kwargs.get('verbose', False)
161
162 if verbose:
163 print 'shuffles: %d' % shuffles
164
165 actual_stat = math.fabs(stat(a) - stat(b))
166
167 if verbose:
168 print 'actual statistic: %f' % actual_stat
169 print '-' * 60
170
171 c = 1e-100
172 lst = LazyConcatenation([a, b])
173 indices = range(len(a) + len(b))
174
175 for i in range(shuffles):
176 if verbose and i % 10 == 0:
177 print 'shuffle: %d' % i
178
179 random.shuffle(indices)
180
181 pseudo_stat_a = stat(LazyMap(lambda i: lst[i], indices[:len(a)]))
182 pseudo_stat_b = stat(LazyMap(lambda i: lst[i], indices[len(a):]))
183 pseudo_stat = math.fabs(pseudo_stat_a - pseudo_stat_b)
184
185 if pseudo_stat >= actual_stat:
186 c += 1
187
188 if verbose and i % 10 == 0:
189 print 'pseudo-statistic: %f' % pseudo_stat
190 print 'significance: %f' % (float(c + 1) / (i + 1))
191 print '-' * 60
192
193 significance = float(c + 1) / (shuffles + 1)
194
195 if verbose:
196 print 'significance: %f' % significance
197 if betai:
198 for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]:
199 print "prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi))
200
201 return (significance, c, shuffles)
202
203
205 """
206 The confusion matrix between a list of reference values and a
207 corresponding list of test values. Entry [M{r},M{t}] of this
208 matrix is a count of the number of times that the reference value
209 M{r} corresponds to the test value M{t}. E.g.:
210
211 >>> ref = 'DET NN VB DET JJ NN NN IN DET NN'.split()
212 >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split()
213 >>> cm = ConfusionMatrix(ref, test)
214 >>> print cm['NN', 'NN']
215 3
216
217 Note that the diagonal entries (M{Ri}=M{Tj}) of this matrix
218 corresponds to correct values; and the off-diagonal entries
219 correspond to incorrect values.
220 """
221 - def __init__(self, reference, test, sort_by_count=False):
222 """
223 Construct a new confusion matrix from a list of reference
224 values and a corresponding list of test values.
225
226 @type reference: C{list}
227 @param reference: An ordered list of reference values.
228 @type test: C{list}
229 @param test: A list of values to compare against the
230 corresponding reference values.
231 @raise ValueError: If C{reference} and C{length} do not have
232 the same length.
233 """
234 if len(reference) != len(test):
235 raise ValueError('Lists must have the same length.')
236
237
238 if sort_by_count:
239 ref_fdist = FreqDist(reference)
240 test_fdist = FreqDist(test)
241 def key(v): return -(ref_fdist[v]+test_fdist[v])
242 values = sorted(set(reference+test), key=key)
243 else:
244 values = sorted(set(reference+test))
245
246
247 indices = dict((val,i) for (i,val) in enumerate(values))
248
249
250 confusion = [[0 for val in values] for val in values]
251 max_conf = 0
252 for w,g in zip(reference, test):
253 confusion[indices[w]][indices[g]] += 1
254 max_conf = max(max_conf, confusion[indices[w]][indices[g]])
255
256
257 self._values = values
258
259 self._indices = indices
260
261 self._confusion = confusion
262
263 self._max_conf = max_conf
264
265 self._total = len(reference)
266
267 self._correct = sum(confusion[i][i] for i in range(len(values)))
268
270 """
271 @return: The number of times that value C{li} was expected and
272 value C{lj} was given.
273 @rtype: C{int}
274 """
275 i = self._indices[li]
276 j = self._indices[lj]
277 return self._confusion[i][j]
278
280 return '<ConfusionMatrix: %s/%s correct>' % (self._correct,
281 self._total)
282
285
286 - def pp(self, show_percents=False, values_in_chart=True):
287 """
288 @return: A multi-line string representation of this confusion
289 matrix.
290 @todo: add marginals?
291 """
292 confusion = self._confusion
293
294 if values_in_chart:
295 values = self._values
296 else:
297 values = range(len(self._values))
298
299
300 valuelen = max(len(str(val)) for val in values)
301 value_format = '%' + `valuelen` + 's | '
302
303 if show_percents:
304 entrylen = 6
305 entry_format = '%5.1f%%'
306 zerostr = ' .'
307 else:
308 entrylen = len(`self._max_conf`)
309 entry_format = '%' + `entrylen` + 'd'
310 zerostr = ' '*(entrylen-1) + '.'
311
312
313 value_strings = [str(val) for val in values]
314 s = ''
315 for i in range(valuelen):
316 s += (' '*valuelen)+' |'
317 for val in value_strings:
318 if i >= valuelen-len(val):
319 s += val[i-valuelen+len(val)].rjust(entrylen+1)
320 else:
321 s += ' '*(entrylen+1)
322 s += ' |\n'
323
324
325 s += '%s-+-%s+\n' % ('-'*valuelen, '-'*((entrylen+1)*len(values)))
326
327
328 for i in range(len(values)):
329 s += value_format % values[i]
330 for j in range(len(values)):
331 if confusion[i][j] == 0:
332 s += zerostr
333 elif show_percents:
334 s += entry_format % (100.0*confusion[i][j]/self._total)
335 else:
336 s += entry_format % confusion[i][j]
337 if i == j:
338 prevspace = s.rfind(' ')
339 s = s[:prevspace] + '<' + s[prevspace+1:] + '>'
340 else: s += ' '
341 s += '|\n'
342
343
344 s += '%s-+-%s+\n' % ('-'*valuelen, '-'*((entrylen+1)*len(values)))
345
346
347 s += '(row = reference; col = test)\n'
348 if not values_in_chart:
349 s += 'Value key:\n'
350 for i, value in enumerate(self._values):
351 s += '%6d: %s\n' % (i, value)
352
353 return s
354
356 values = self._values
357 str = 'Value key:\n'
358 indexlen = len(`len(values)-1`)
359 key_format = ' %'+`indexlen`+'d: %s\n'
360 for i in range(len(values)):
361 str += key_format % (i, values[i])
362
363 return str
364
365
366
367
368
369
370
372 """
373 Compute the windowdiff score for a pair of segmentations. A segmentation is any sequence
374 over a vocabulary of two items (e.g. "0", "1"), where the specified boundary value is used
375 to mark the edge of a segmentation.
376
377 @param seg1: a segmentation
378 @type seg1: C{string} or C{list}
379 @param seg2: a segmentation
380 @type seg2: C{string} or C{list}
381 @param k: window width
382 @type k: C{int}
383 @param boundary: boundary value
384 @type boundary: C{string} or C{int} or C{bool}
385 @rtype: C{int}
386 """
387
388 if len(seg1) != len(seg2):
389 raise ValueError, "Segmentations have unequal length"
390 wd = 0
391 for i in range(len(seg1) - k):
392 wd += abs(seg1[i:i+k+1].count(boundary) - seg2[i:i+k+1].count(boundary))
393 return wd
394
395
396
398 lev = []
399 for i in range(len1):
400 lev.append([0] * len2)
401 for i in range(len1):
402 lev[i][0] = i
403 for j in range(len2):
404 lev[0][j] = j
405 return lev
406
408 a = lev[i-1][j ] + 1
409 b = lev[i-1][j-1] + (c1 != c2)
410 c = lev[i ][j-1] + 1
411 lev[i][j] = min(a,b,c)
412
414 """
415 Calculate the Levenshtein edit-distance between two strings.
416 The edit distance is the number of characters that need to be
417 substituted, inserted, or deleted, to transform s1 into s2. For
418 example, transforming "rain" to "shine" requires three steps,
419 consisting of two substitutions and one insertion:
420 "rain" -> "sain" -> "shin" -> "shine". These operations could have
421 been done in other orders, but at least three steps are needed.
422
423 @param s1, s2: The strings to be analysed
424 @type s1: C{string}
425 @type s2: C{string}
426 @rtype C{int}
427 """
428
429 len1 = len(s1); len2 = len(s2)
430 lev = _edit_dist_init(len1+1, len2+1)
431
432
433 for i in range(len1):
434 for j in range (len2):
435 _edit_dist_step(lev, i+1, j+1, s1[i], s2[j])
436 return lev[len1][len2]
437
438
440 print '-'*75
441 reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
442 test = 'DET VB VB DET NN NN NN IN DET NN'.split()
443 print 'Reference =', reference
444 print 'Test =', test
445 print 'Confusion matrix:'
446 print ConfusionMatrix(reference, test)
447 print 'Accuracy:', accuracy(reference, test)
448
449 print '-'*75
450 reference_set = set(reference)
451 test_set = set(test)
452 print 'Reference =', reference_set
453 print 'Test = ', test_set
454 print 'Precision:', precision(reference_set, test_set)
455 print ' Recall:', recall(reference_set, test_set)
456 print 'F-Measure:', f_measure(reference_set, test_set)
457 print '-'*75
458
459 if __name__ == '__main__':
460 demo()
461
462 __all__ = ['ConfusionMatrix', 'accuracy', 'demo',
463 'f_measure', 'log_likelihood', 'precision', 'recall',
464 'edit_dist', 'windowdiff']
465