nltk.sem.evaluate

1 # Natural Language Toolkit: Models for first-order languages with lambda 2 # 3 # Copyright (C) 2001-2008 NLTK Project 4 # Author: Ewan Klein <[email protected]>, 5 # URL: <http://nltk.sourceforge.net> 6 # For license information, see LICENSE.TXT 7 # 8 # $Id: evaluate.py 6355 2008-07-30 23:32:16Z ehk $ 9 10 #TODO: 11 #- fix tracing 12 #- fix iterator-based approach to existentials 13 14 15 """ 16 This module provides data structures for representing first-order 17 models. 18 """ 19 from pprint import pformat 20 import inspect 21 from nltk.decorators import decorator 22 from nltk.internals import deprecated 23 24 from logic import * 25 26

27 -class Error(Exception): pass

28

29 -class Undefined(Error): pass

30

31 -def trace(f, *args, **kw):

32 argspec = inspect.getargspec(f) 33 d = dict(zip(argspec[0], args)) 34 if d.pop('trace', None): 35 print 36 for item in d.items(): 37 print "%s => %s" % item 38 return f(*args, **kw)

39

40 -def is_rel(s):

41 """ 42 Check whether a set represents a relation (of any arity). 43 44 @param s: a set containing C{tuple}s of C{str} elements 45 @type s: C{set} 46 @rtype: C{bool} 47 """ 48 # we have the empty relation, i.e. set() 49 if len(s) == 0: 50 return True 51 # all the elements are tuples of the same length 52 elif len(max(s))==len(min(s)): 53 return True 54 else: 55 raise ValueError, "Set %r contains sequences of different lengths" % s

56

57 -def set2rel(s):

58 """ 59 Convert a set containing individuals (strings or numbers) into a set of 60 unary tuples. Any tuples of strings already in the set are passed through 61 unchanged. 62 63 For example: 64 - set(['a', 'b']) => set([('a',), ('b',)]) 65 - set([3, 27]) => set([('3',), ('27',)]) 66 67 @type s: C{set} 68 @rtype: C{set} of C{tuple} of C{str} 69 """ 70 new = set() 71 for elem in s: 72 if isinstance(elem, str): 73 new.add((elem,)) 74 elif isinstance(elem, int): 75 new.add((str(elem,))) 76 else: 77 new.add(elem) 78 return new

79

80 -def arity(rel):

81 """ 82 Check the arity of a relation. 83 @type rel: C{set} of C{tuple}s 84 @rtype: C{int} of C{tuple} of C{str} 85 """ 86 if len(rel) == 0: 87 return 0 88 return len(list(rel)[0])

89 90 @decorator(trace)

91 -def app(rel, arg, trace=False):

92 """ 93 Apply a relation (as set of tuples) to an argument. 94 95 If C{rel} has arity n <= 1, then C{app} returns a Boolean value. 96 If If C{rel} has arity n > 1, then C{app} returns a relation of arity n-1. 97 98 @type rel: C{set} of C{tuple}s 99 @param arg: any appropriate semantic argument 100 @rtype: C{bool} or a C{set} of C{tuple}s 101 """ 102 assert is_rel(rel) 103 reduced = set([tup[1:] for tup in rel if tup[0] == arg]) 104 if arity(rel) <= 1: 105 return bool(len(reduced)) 106 else: 107 return reduced

108

109 -def make_VariableExpression(var):

110 """ 111 Convert a string into an instance of L{VariableExpression} 112 113 @type var: C{str} 114 @rtype: C{VariableExpression} or C{IndividualVariableExpression} 115 """ 116 117 variable = Variable(var) 118 if is_indvar(var): 119 return IndividualVariableExpression(variable) 120 else: 121 return VariableExpression(variable)

122

123 -class Valuation(dict):

124 """ 125 A dictionary which represents a model-theoretic Valuation of non-logical constants. 126 Keys are strings representing the constants to be interpreted, and values correspond 127 to individuals (represented as strings) and n-ary relations (represented as sets of tuples 128 of strings). 129 130 An instance of L{Valuation} will raise a KeyError exception (i.e., 131 just behave like a standard dictionary) if indexed with an expression that 132 is not in its list of symbols. 133 """

134 - def __init__(self, iter):

135 """ 136 @param iter: a C{list} of (symbol, value) pairs. 137 """ 138 dict.__init__(self) 139 for (sym, val) in iter: 140 if isinstance(val, str) or isinstance(val, bool): 141 self[sym] = val 142 elif isinstance(val, set): 143 self[sym] = set2rel(val)

144

145 - def __getitem__(self, key):

146 if key in self: 147 return dict.__getitem__(self, key) 148 else: 149 raise Undefined, "Unknown expression: '%s'" % key

150 151 #def __str__(self): 152 #return "{%s}" % ', '.join(map(str, self)) 153 154 @deprecated("Call the valuation as an initialization parameter instead")

155 - def read(self, seq):

156 self.update(seq)

157

158 - def __str__(self):

159 return pformat(self)

160

161 - def _getDomain(self):

162 dom = [] 163 for val in self.values(): 164 if isinstance(val, str): 165 dom.append(val) 166 else: 167 dom.extend([elem for tuple in val for elem in tuple if elem is not None]) 168 return set(dom)

169 170 domain = property(_getDomain, 171 doc='Set-theoretic domain of the value-space of a Valuation.') 172

173 - def _getSymbols(self):

174 return sorted(self.keys())

175 176 symbols = property(_getSymbols, 177 doc='The non-logical constants which the Valuation recognizes.')

178 179

180 -class Assignment(dict):

181 """ 182 A dictionary which represents an assignment of values to variables. 183 184 An assigment can only assign values from its domain. 185 186 If an unknown expression M{a} is passed to a model M{M}'s 187 interpretation function M{i}, M{i} will first check whether M{M}'s 188 valuation assigns an interpretation to M{a} as a constant, and if 189 this fails, M{i} will delegate the interpretation of M{a} to 190 M{g}. M{g} only assigns values to individual variables (i.e., 191 members of the class L{IndividualVariableExpression} in the L{logic} 192 module. If a variable is not assigned a value by M{g}, it will raise 193 an C{Undefined} exception. 194 """

195 - def __init__(self, domain, iter=None):

196 """ 197 @param domain: the domain of discourse 198 @type domain: C{set} 199 @param assignment: a map from variable names to values 200 @type domain: C{dict} 201 """ 202 dict.__init__(self) 203 self.domain = domain 204 if iter: 205 for (var, val) in iter: 206 assert val in self.domain,\ 207 "'%s' is not in the domain: %s" % (val, self.domain) 208 assert is_indvar(var),\ 209 "Wrong format for an Individual Variable: '%s'" % var 210 self[var] = val 211 self._addvariant()

212

213 - def __getitem__(self, key):

214 if key in self: 215 return dict.__getitem__(self, key) 216 else: 217 raise Undefined, "Not recognized as a variable: '%s'" % key

218

219 - def copy(self):

220 new = Assignment(self.domain) 221 new.update(self) 222 return new

223

224 - def purge(self, var=None):

225 """ 226 Remove one or all keys (i.e. logic variables) from an 227 assignment, and update C{self.variant}. 228 229 @param var: a Variable acting as a key for the assignment. 230 """ 231 if var: 232 val = self[var] 233 del self[var] 234 else: 235 self.clear() 236 self._addvariant() 237 return None

238

239 - def __str__(self):

240 """ 241 Pretty printing for assignments. {'x', 'u'} appears as 'g[u/x]' 242 """ 243 gstring = "g" 244 for (val, var) in self.variant: 245 gstring = gstring + "[" + str(val) + "/" + str(var) + "]" 246 return gstring

247

248 - def _addvariant(self):

249 """ 250 Create a more pretty-printable version of the assignment. 251 """ 252 list = [] 253 for item in self.items(): 254 pair = (item[1], item[0]) 255 list.append(pair) 256 self.variant = list 257 return None

258

259 - def add(self, var, val):

260 """ 261 Add a new variable-value pair to the assignment, and update 262 C{self.variant}. 263 264 """ 265 assert val in self.domain,\ 266 "%s is not in the domain %s" % (val, self.domain) 267 #assert isinstance(var, IndividualVariableExpression),\ 268 assert is_indvar(var),\ 269 "Wrong format for an Individual Variable: '%s'" % var 270 #self[var.variable.name] = val 271 self[var] = val 272 self._addvariant() 273 return self

274 275

276 -class Model(object):

277 """ 278 A first order model is a domain M{D} of discourse and a valuation M{V}. 279 280 A domain M{D} is a set, and a valuation M{V} is a map that associates 281 expressions with values in the model. 282 The domain of M{V} should be a subset of M{D}. 283 """ 284

285 - def __init__(self, domain, valuation, prop=None):

286 """ 287 Construct a new L{Model}. 288 289 @type domain: C{set} 290 @param domain: A set of entities representing the domain of discourse of the model. 291 @type valuation: L{Valuation} 292 @param valuation: the valuation of the model. 293 @param prop: If this is set, then we are building a propositional\ 294 model and don't require the domain of M{V} to be subset of M{D}. 295 """ 296 assert isinstance(domain, set) 297 self.domain = domain 298 self.valuation = valuation 299 if prop is None: 300 if not domain.issuperset(valuation.domain): 301 raise Error,\ 302 "The valuation domain, %s, must be a subset of the model's domain, %s"\ 303 % (valuation.domain, domain)

304

305 - def __repr__(self):

306 return "(%r, %r)" % (self.domain, self.valuation)

307

308 - def __str__(self):

309 return "Domain = %s,\nValuation = \n%s" % (self.domain, self.valuation)

310 311 # Interpretations for the binary boolean operators 312 ############################# 313

314 - def AND(self, arg1, arg2):

315 return min(arg1, arg2)

316 317

318 - def OR(self, arg1, arg2):

319 return max(arg1, arg2)

320 321

322 - def IMPLIES(self, arg1, arg2):

323 return max(not arg1, arg2)

324 325

326 - def IFF(self, arg1, arg2):

327 return arg1 == arg2

328 329 # EQUALITY 330 ######## 331

332 - def EQ(self, arg1, arg2):

333 return arg1 == arg2

334 335 # Interpretations for the classical quantifiers 336 ######################### 337

338 - def ALL(self, sat):

339 return sat == self.domain

340

341 - def EXISTS(self, sat):

342 for u in sat: 343 return True 344 return False

345 346 OPS = { 347 '&': AND, 348 '|': OR, 349 '->': IMPLIES, 350 '<->': IFF, 351 '=': EQ, 352 } 353

354 - def evaluate(self, expr, g, trace=None):

355 """ 356 Call the L{LogicParser} to parse input expressions, and 357 provide a handler for L{satisfy} 358 that blocks further propagation of the C{Undefined} error. 359 @param expr: An C{Expression} of L{logic}. 360 @type g: L{Assignment} 361 @param g: an assignment to individual variables. 362 @rtype: C{bool} or 'Undefined' 363 """ 364 try: 365 lp = LogicParser() 366 parsed = lp.parse(expr) 367 value = self.satisfy(parsed, g, trace=trace) 368 if trace: 369 print 370 print "'%s' evaluates to %s under M, %s" % (expr, value, g) 371 return value 372 except Undefined: 373 if trace: 374 print 375 print "'%s' is undefined under M, %s" % (expr, g) 376 return 'Undefined'

377 378

379 - def satisfy(self, parsed, g, trace=None):

380 """ 381 Recursive interpretation function for a formula of first-order logic. 382 383 Raises an C{Undefined} error when C{parsed} is an atomic string 384 but is not a symbol or an individual variable. 385 386 @return: Returns a truth value or C{Undefined} if C{parsed} is\ 387 complex, and calls the interpretation function C{i} if C{parsed}\ 388 is atomic. 389 390 @param parsed: An expression of L{logic}. 391 @type g: L{Assignment} 392 @param g: an assignment to individual variables. 393 """ 394 395 OPS = Model.OPS 396 397 if isinstance(parsed, ApplicationExpression): 398 argval = self.satisfy(parsed.argument, g) 399 funval = self.satisfy(parsed.function, g) 400 if isinstance(funval, dict): 401 return funval[argval] 402 else: 403 return app(funval, argval) 404 elif isinstance(parsed, NegatedExpression): 405 return not self.satisfy(parsed.term, g) 406 elif isinstance(parsed, BooleanExpression): 407 op = parsed.getOp() 408 return OPS[op](self, self.satisfy(parsed.first, g), self.satisfy(parsed.second, g)) 409 elif isinstance(parsed, AllExpression): 410 return self.ALL(self.satisfiers(parsed.term, parsed.variable, g)) 411 elif isinstance(parsed, ExistsExpression): 412 satisfiers = self.satisfiers(parsed.term, parsed.variable, g) 413 return self.EXISTS(satisfiers) 414 elif isinstance(parsed, LambdaExpression): 415 cf = {} 416 #varex = self.make_VariableExpression(parsed.variable) 417 var = parsed.variable.name 418 for u in self.domain: 419 val = self.satisfy(parsed.term, g.add(var, u)) 420 # NB the dict would be a lot smaller if we do this: 421 # if val: cf[u] = val 422 # But then need to deal with cases where f(a) should yield 423 # a function rather than just False. 424 cf[u] = val 425 return cf 426 else: 427 return self.i(parsed, g, trace)

428 429 #@decorator(trace_eval)

430 - def i(self, parsed, g, trace=False):

431 """ 432 An interpretation function. 433 434 Assuming that C{parsed} is atomic: 435 436 - if C{parsed} is a non-logical constant, calls the valuation M{V} 437 - else if C{parsed} is an individual variable, calls assignment M{g} 438 - else returns C{Undefined}. 439 440 @param parsed: an C{Expression} of L{logic}. 441 @type g: L{Assignment} 442 @param g: an assignment to individual variables. 443 @return: a semantic value 444 """ 445 # If parsed is a propositional letter 'p', 'q', etc, it could be in valuation.symbols 446 # and also be an IndividualVariableExpression. We want to catch this first case. 447 # So there is a procedural consequence to the ordering of clauses here: 448 if parsed.variable.name in self.valuation.symbols: 449 return self.valuation[parsed.variable.name] 450 elif isinstance(parsed, IndividualVariableExpression): 451 return g[parsed.variable.name] 452 453 else: 454 raise Undefined, "Can't find a value for %s" % parsed

455 456 #try: 457 #if trace > 1: 458 #print " i, %s('%s') = %s" % (g, expr, self.valuation[expr]) 459 ## expr is a non-logical constant, i.e., in self.valuation.symbols 460 #return self.valuation[expr] 461 #except Undefined: 462 #if trace > 1: 463 #print " (checking whether '%s' is an individual variable)" % expr 464 465 ## expr wasn't a constant; maybe a variable that g knows about? 466 #return g[expr] 467 468

469 - def satisfiers(self, parsed, varex, g, trace=None, nesting=0):

470 """ 471 Generate the entities from the model's domain that satisfy an open formula. 472 473 @param parsed: an open formula 474 @type parsed: L{Expression} 475 @param varex: the relevant free individual variable in C{parsed}. 476 @type varex: C{VariableExpression} or C{str} 477 @param g: a variable assignment 478 @type g: L{Assignment} 479 @return: a C{set} of the entities that satisfy C{parsed}. 480 """ 481 482 spacer = ' ' 483 indent = spacer + (spacer * nesting) 484 candidates = [] 485 486 if isinstance(varex, str): 487 var = make_VariableExpression(varex).variable 488 else: 489 var = varex 490 491 if var in parsed.free(): 492 if trace: 493 print 494 print (spacer * nesting) + "Open formula is '%s' with assignment %s" % (parsed, g) 495 for u in self.domain: 496 new_g = g.copy() 497 #new_g.add(u, self.make_VariableExpression(var)) 498 new_g.add(var.name, u) 499 if trace > 1: 500 lowtrace = trace-1 501 else: 502 lowtrace = 0 503 value = self.satisfy(parsed, new_g, lowtrace) 504 505 if trace: 506 print indent + "(trying assignment %s)" % new_g 507 508 # parsed == False under g[u/var]? 509 if value == False: 510 if trace: 511 print indent + "value of '%s' under %s is False" % (parsed, new_g) 512 513 514 # so g[u/var] is a satisfying assignment 515 else: 516 candidates.append(u) 517 if trace: 518 print indent + "value of '%s' under %s is %s" % (parsed, new_g, value) 519 520 result = set(c for c in candidates) 521 # var isn't free in parsed 522 else: 523 raise Undefined, "%s is not free in %s" % (var.name, parsed) 524 525 return result

526 527 528 529 530 531 #////////////////////////////////////////////////////////////////////// 532 # Demo.. 533 #////////////////////////////////////////////////////////////////////// 534 # number of spacer chars 535 mult = 30 536 537 # Demo 1: Propositional Logic 538 #################

539 -def propdemo(trace=None):

540 """Example of a propositional model.""" 541 542 global val1, dom1, m1, g1 543 val1 = Valuation([('p', True), ('q', True), ('r', False)]) 544 dom1 = set([]) 545 m1 = Model(dom1, val1, prop=True) 546 g1 = Assignment(dom1) 547 548 print 549 print '*' * mult 550 print "Propositional Formulas Demo" 551 print '*' * mult 552 print "Model m1:\n", m1 553 print '*' * mult 554 sentences = [ 555 '(p & q)', 556 '(p & r)', 557 '- p', 558 '- r', 559 '- - p', 560 '- (p & r)', 561 '(p | r)', 562 '(r | p)', 563 '(r | r)', 564 '(- p | r)', 565 '(p | - p)', 566 '(p -> q)', 567 '(p -> r)', 568 '(r -> p)', 569 '(p <-> p)', 570 '(r <-> r)', 571 '(p <-> r)', 572 ] 573 574 for sent in sentences: 575 if trace: 576 print 577 m1.evaluate(sent, g1, trace) 578 else: 579 print "The value of '%s' is: %s" % (sent, m1.evaluate(sent, g1))

580 581 # Demo 2: FOL Model 582 ############# 583

584 -def folmodel(trace=None):

585 """Example of a first-order model.""" 586 587 global val2, v2, dom2, m2, g2 588 589 v2 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\ 590 ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])), 591 ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))] 592 val2 = Valuation(v2) 593 dom2 = val2.domain 594 m2 = Model(dom2, val2) 595 g2 = Assignment(dom2, [('x', 'b1'), ('y', 'g2')]) 596 597 if trace: 598 print "*" * mult 599 print "Model m2\n", m2 600 print "*" * mult 601 print g2 602 g2.purge() 603 print g2 604 605 exprs = ['adam', 'girl', 'love', 'walks', 'x', 'y', 'z'] 606 lp = LogicParser() 607 parsed_exprs = [lp.parse(e) for e in exprs] 608 609 if trace: 610 for parsed in parsed_exprs: 611 try: 612 print "The interpretation of '%s' in m2 is %s" % (parsed, m2.i(parsed, g2)) 613 except Undefined: 614 print "The interpretation of '%s' in m2 is Undefined" % parsed

615 616 # Demo 3: FOL 617 ######### 618

619 -def foldemo(trace=None):

620 """ 621 Interpretation of closed expressions in a first-order model. 622 """ 623 folmodel() 624 625 print 626 print '*' * mult 627 print "FOL Formulas Demo" 628 print '*' * mult 629 630 formulas = [ 631 'love (adam, betty)', 632 '(adam = mia)', 633 '\\x. (boy(x) | girl(x))', 634 '\\x. boy(x)(adam)', 635 '\\x y. love(x, y)', 636 '\\x y. love(x, y)(adam)(betty)', 637 '\\x y. love(x, y)(adam, betty)', 638 '\\x y. (boy(x) & love(x, y))', 639 '\\x. exists y. (boy(x) & love(x, y))', 640 'exists z1. boy(z1)', 641 'exists x. (boy(x) & -(x = adam))', 642 'exists x. (boy(x) & all y. love(y, x))', 643 'all x. (boy(x) | girl(x))', 644 'all x. (girl(x) -> exists y. boy(y) & love(x, y))', #Every girl loves exists boy. 645 'exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', #There is exists boy that every girl loves. 646 'exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', #exists boy loves every girl. 647 'all x. (dog(x) -> - girl(x))', 648 'exists x. exists y. (love(x, y) & love(x, y))' 649 ] 650 651 652 for fmla in formulas: 653 g2.purge() 654 if trace: 655 m2.evaluate(fmla, g2, trace) 656 else: 657 print "The value of '%s' is: %s" % (fmla, m2.evaluate(fmla, g2))

658 659 660 # Demo 3: Satisfaction 661 ############# 662

663 -def satdemo(trace=None):

664 """Satisfiers of an open formula in a first order model.""" 665 666 print 667 print '*' * mult 668 print "Satisfiers Demo" 669 print '*' * mult 670 671 folmodel() 672 673 formulas = [ 674 'boy(x)', 675 '(x = x)', 676 '(boy(x) | girl(x))', 677 '(boy(x) & girl(x))', 678 'love(adam, x)', 679 'love(x, adam)', 680 '-(x = adam)', 681 'exists z22. love(x, z22)', 682 'exists y. love(y, x)', 683 'all y. (girl(y) -> love(x, y))', 684 'all y. (girl(y) -> love(y, x))', 685 'all y. (girl(y) -> (boy(x) & love(y, x)))', 686 '(boy(x) & all y. (girl(y) -> love(x, y)))', 687 '(boy(x) & all y. (girl(y) -> love(y, x)))', 688 '(boy(x) & exists y. (girl(y) & love(y, x)))', 689 '(girl(x) -> dog(x))', 690 'all y. (dog(y) -> (x = y))', 691 'exists y. love(y, x)', 692 'exists y. (love(adam, y) & love(y, x))' 693 ] 694 695 if trace: 696 print m2 697 698 lp = LogicParser() 699 for fmla in formulas: 700 print fmla 701 lp.parse(fmla) 702 703 parsed = [lp.parse(fmla) for fmla in formulas] 704 705 for p in parsed: 706 g2.purge() 707 print "The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, 'x', g2, trace))

708 709

710 -def demo(num=0, trace=None):

711 """ 712 Run exists demos. 713 714 - num = 1: propositional logic demo 715 - num = 2: first order model demo (only if trace is set) 716 - num = 3: first order sentences demo 717 - num = 4: satisfaction of open formulas demo 718 - any other value: run all the demos 719 720 @param trace: trace = 1, or trace = 2 for more verbose tracing 721 """ 722 demos = { 723 1: propdemo, 724 2: folmodel, 725 3: foldemo, 726 4: satdemo} 727 728 try: 729 demos[num](trace=trace) 730 except KeyError: 731 for num in demos: 732 demos[num](trace=trace)

733 734 735 if __name__ == "__main__": 736 demo(4, trace=1) 737

Source Code for Module nltk.sem.evaluate