Package nltk :: Package wordnet :: Module synset
[hide private]
[frames] | no frames]

Source Code for Module nltk.wordnet.synset

  1  # Natural Language Toolkit: Wordnet Interface: Wordnet Module 
  2  # 
  3  # Copyright (C) 2001-2008 NLTK Project 
  4  # Author: Oliver Steele <[email protected]> 
  5  #         David Ormiston Smith <[email protected]>> 
  6  #         Steven Bird <[email protected]> 
  7  # URL: <http://nltk.org> 
  8  # For license information, see LICENSE.TXT 
  9   
 10  import math 
 11  import pickle 
 12  import string 
 13  import re 
 14   
 15  from nltk import defaultdict 
 16   
 17  from util import * 
 18  import dictionary 
 19  import similarity 
 20  from frequency import * 
 21  from lexname import Lexname 
 22   
23 -class Word(object):
24 - def __init__(self, line):
25 """ 26 Extract a word from a line of a WordNet POS file. 27 @type line: C{string} 28 @param line: The appropriate line taken from the Wordnet data files. 29 """ 30 31 tokens = line.split() 32 ints = map(int, tokens[int(tokens[3]) + 4:]) 33 34 self.form = tokens[0].replace('_', ' ') # orthography 35 self.pos = normalizePOS(tokens[1]) # NOUN, VERB, ADJECTIVE, ADVERB 36 self.taggedSenseCount = ints[1] # Number of senses tagged 37 self._synsetOffsets = ints[2:ints[0]+2] # Offsets of this word's synsets
38
39 - def synsets(self):
40 """ 41 Get a sequence of the L{synsets}s of this word. 42 43 >>> from nltk.wordnet import * 44 >>> N['dog'].synsets() 45 [{noun: dog, domestic dog, Canis familiaris}, {noun: frump, dog}, {noun: dog}, {noun: cad, bounder, blackguard, dog, hound, heel}, {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, {noun: pawl, detent, click, dog}, {noun: andiron, firedog, dog, dog-iron}] 46 47 @return: A list of this L{Word}'s L{Synset}s 48 """ 49 50 try: 51 return self._synsets 52 except AttributeError: 53 self._synsets = [dictionary.synset(self.pos, offset) 54 for offset in self._synsetOffsets] 55 del self._synsetOffsets 56 return self._synsets
57
58 - def senses(self):
59 """ 60 Return a list of WordSense objects corresponding to this word's L{synset}s. 61 """ 62 return [s.wordSense(self.form) for s in self.synsets()]
63
64 - def senseCounts(self):
65 """ 66 Return the frequencies of each sense of this word in a tagged concordance. 67 """ 68 return [s.count() for s in self.senses()]
69
70 - def isTagged(self):
71 """ 72 >>> from nltk.wordnet import * 73 >>> N['dog'].isTagged() 74 True 75 76 @return: True/false (1/0) if one of this L{Word}'s senses is tagged. 77 """ 78 return self.taggedSenseCount > 0
79 80 # Broken 81 # def getAdjectivePositions(self): 82 # """ 83 # >>> from nltk.wordnet import * 84 # >>> ADJ['clear'].getAdjectivePositions() 85 # [None, 'predicative'] 86 # 87 # @return: Return a list of adjective positions that this word can 88 # appear in. These are elements of ADJECTIVE_POSITIONS. 89 # """ 90 # 91 # return list(set(synset.position for synset in self)) 92
93 - def __getitem__(self, idx):
94 return self.synsets()[idx]
95
96 - def __iter__(self):
97 return iter(self.synsets())
98
99 - def __contains__(self, item):
100 return item in self.synsets()
101
102 - def __getslice__(self, i, j):
103 return self.synsets()[i:j]
104
105 - def __len__(self):
106 return len(self.synsets())
107
108 - def __repr__(self):
109 # return "<Word:" + self.form + '/' + self.pos + ">" 110 return self.__str__()
111
112 - def __str__(self):
113 return self.form + ' (' + self.pos + ")"
114
115 - def __cmp__(self, other):
116 return _compareInstances(self, other, ('form', 'pos'))
117
118 - def __hash__(self):
119 return hash((self.form, self.pos))
120 121
122 -class WordSense(object):
123 """ 124 A single word-sense pairing, indicated by in WordNet by a sense key of 125 the form:: 126 lemma%ss_type:lex_filenum:lex_id:head_word:head_id 127 """ 128 129 _ssTypeMap = {'n': 1, 'v': 2, 'a': 3, 'r': 4, 's':5} 130 _ssTypeRevMap = dict((v,k) for k,v in _ssTypeMap.iteritems()) 131
132 - def __init__(self, senseKey):
133 self.senseKey = senseKey 134 self.lemma, remainder = senseKey.split('%', 1) 135 (ssType, lexFilenum, lexId, 136 self.headWord, headId) = remainder.split(':') 137 138 self.ssType = self._ssTypeRevMap[int(ssType)] 139 self.lexFilenum = int(lexFilenum) 140 self.lexId = int(lexId) 141 try: 142 self.headId = int(headId) 143 except ValueError: 144 self.headId = None
145
146 - def count(self):
147 return senseCount(self.senseKey)
148
149 - def _senseIndexLine(self):
150 try: 151 WordSense._index 152 except AttributeError: 153 path = nltk.data.find('corpora/wordnet/index.sense') 154 WordSense._index = open(path, FILE_OPEN_MODE) 155 156 res = binarySearchFile(WordSense._index, self.senseKey) 157 if res: 158 return res 159 raise ValueError("Count not find data for sense '%s'. " 160 "Is the key wrong?" % self.senseKey)
161
162 - def synset(self):
163 line = self._senseIndexLine() 164 return dictionary.synset(self.ssType, int(line.split()[1]))
165
166 - def word(self):
167 return dictionary.word(self.lemma, self.ssType)
168
169 - def senseNo(self):
170 line = self._senseIndexLine() 171 return int(line.split()[2])
172
173 - def lexname(self):
174 return Lexname.lexnames[self.lexFilenum]
175
176 - def __str__(self):
177 return ('%s (%s) %d' 178 % (self.lemma, normalizePOS(self.ssType), self.senseNo()))
179
180 - def __cmp__(self, other):
181 return _compareInstances(self, other, ('senseKey',))
182
183 - def __hash__(self):
184 return hash(self.senseKey)
185 186 187 __repr__ = __str__ 188 189 @staticmethod
190 - def fromSynset(synset, lemma, lex_id):
191 ss_type = WordSense._ssTypeMap[synset.ssType] 192 lex_filenum = synset.lexname.id 193 head_word = '' 194 head_id = '' 195 if synset.ssType == 's': 196 # Satellite adjectives are treated specially 197 head_word = synset.headSynset.words[0] 198 head_id = synset.headSynset.wordSenses[0].lexId 199 200 return WordSense.fromKeyParams( 201 lemma.lower(), ss_type, lex_filenum, lex_id, head_word, head_id)
202 203 @staticmethod
204 - def fromKeyParams(lemma, ss_type, lex_filenum, lex_id, 205 head_word='', head_id=''):
206 207 if head_word: 208 head_id = '%02d' % head_id 209 210 return WordSense('%s%%%d:%02d:%02d:%s:%s' 211 % (lemma, ss_type, lex_filenum, lex_id, head_word, head_id))
212 213
214 -class Synset(object):
215 """ 216 A set of synonyms. 217 218 Each synset contains one or more Senses, which represent a 219 specific sense of a specific word. Senses can be retrieved via 220 synset.senses() or through the index notations synset[0], 221 synset[string], or synset[word]. Synsets participate in 222 lexical relations, which can be accessed via synset.relations(). 223 224 >>> from nltk.wordnet import * 225 >>> N['dog'][0] 226 {noun: dog, domestic_dog, Canis_familiaris} 227 >>> N['dog'][0][HYPERNYM] 228 [{noun: canine, canid}, {noun: domestic_animal, domesticated_animal}] 229 >>> V['think'][0].verbFrameStrings 230 ['Something think something Adjective/Noun', 'Somebody think somebody'] 231 232 @type pos: C{string} 233 @ivar pos: The part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB. 234 235 @type offset: C{int} 236 @ivar offset: An integer offset into the part-of-speech file. Together 237 with pos, this can be used as a unique id. 238 239 @type gloss: C{string} 240 @ivar gloss: A gloss (dictionary definition) for the sense. 241 242 @type verbFrames: C{list} of C{integer} 243 @ivar verbFrames: A sequence of integers that index into 244 VERB_FRAME_STRINGS. These list the verb frames that any 245 Sense in this synset participates in. (See also 246 Sense.verbFrames.) Defined only for verbs. 247 """ 248
249 - def __init__(self, pos, offset, line):
250 """Initialize the synset from a line in a WordNet lexicographer file.""" 251 252 # Part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB. 253 self.pos = pos 254 255 # Integer offset into the part-of-speech file. Together with pos, 256 # this can be used as a unique id. 257 self.offset = offset 258 259 # The synset entry can be broadly divided into two parts: the 260 # synset and relational data, and its human readable description, or 261 # gloss. The '|' character separates these. 262 263 dividerIndex = line.index('|') 264 tokens = line[:dividerIndex].split() 265 self.ssType = tokens[2] 266 self.gloss = line[dividerIndex + 1:].strip() 267 self.lexname = Lexname.lexnames[int(tokens[1])] 268 269 # TODO: This next code is dense and confusing. Clean up at some point. 270 # line is of the form: 271 # synset_offset lex_filenum ss_type w_cnt word lex_id [word lex_id...] p_cnt [ptr...] [frames...] | gloss 272 273 synset_cnt = int(tokens[3], 16) # hex integer representing number of items in the synset; same as w_cnt above 274 275 #extract all pairs of the form (sense, lex_id), plus a remainder 276 (senseTuples, remainder1) = _partition(tokens[4:], 2, synset_cnt) 277 self.words = [form for form, lex_id in senseTuples] 278 279 #extract all pointer quadruples, plus a remainder 280 (self._pointerTuples, remainder2) = _partition(remainder1[1:], 4, int(remainder1[0])) 281 282 # Find word senses (via sense keys) from lemma and lex_id 283 if self.ssType == 's': 284 # need head synset available for finding sense_keys 285 self.headSynset = self.relation('similar')[0] 286 self.wordSenses = [WordSense.fromSynset(self, form, int(lex_id, 16)) 287 for form, lex_id in senseTuples] 288 289 #frames: In data.verb only, a list of numbers corresponding to the 290 #generic verb sentence frames for word s in the synset. frames is of 291 #the form: 292 #f_cnt + f_num w_num [ + f_num w_num...] 293 #where f_cnt is a two digit decimal integer indicating the number of 294 #generic frames listed, f_num is a two digit decimal integer frame 295 #number, and w_num is a two digit hexadecimal integer indicating the 296 #word in the synset that the frame applies to. As with pointers, if 297 #this number is 00 , f_num applies to all word s in the synset. If 298 #non-zero, it is applicable only to the word indicated. Word numbers 299 #are assigned as described for pointers. 300 301 if pos == VERB: 302 (vfTuples, remainder3) = _partition(remainder2[1:], 3, int(remainder2[0])) 303 304 #now only used for senseVerbFrames 305 def extractVerbFrames(index, vfTuples): 306 return tuple(map(lambda t:int(t[1]), filter(lambda t,i=index:int(t[2],16) in (0, i), vfTuples)))
307 308 senseVerbFrames = [] 309 for index in range(1, len(self.words) + 1): 310 senseVerbFrames.append(extractVerbFrames(index, vfTuples)) 311 self._senseVerbFrames = senseVerbFrames 312 313 # A sequence of integers that index into VERB_FRAME_STRINGS. These 314 # list the verb frames that any Sense in this synset participates 315 # in (see also Sense.verbFrames). Defined only for verbs. 316 317 self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) 318 319 #A list of verb frame strings for this synset 320 self.verbFrameStrings = self.extractVerbFrameStrings(vfTuples)
321
322 - def wordSense(self, word):
323 """ 324 Return the WordSense object for the given word in this synset. 325 """ 326 word = word.replace(' ', '_') 327 try: 328 index = self.words.index(word) 329 except ValueError: 330 try: 331 # Try for proper noun 332 index = self.words.index(word.title()) 333 except ValueError: 334 raise ValueError( 335 "Could not find word '%s' for this synset." % word) 336 337 return self.wordSenses[index]
338
339 - def extractVerbFrameStrings(self, vfTuples):
340 """ 341 Return a list of verb frame strings for this synset. 342 """ 343 # extract a frame index if 3rd item is 00 344 frame_indices = [int(t[1]) for t in vfTuples if int(t[2], 16) == 0] 345 try: 346 verbFrames = [VERB_FRAME_STRINGS[i] for i in frame_indices] 347 except IndexError: 348 return [] 349 #ideally we should build 3rd person morphology for this form 350 form = self[0] 351 verbFrameStrings = [vf % form for vf in verbFrames] 352 return verbFrameStrings
353
354 - def relations(self):
355 """ 356 Return a dictionary of synsets, one per lexical relation 357 358 @return: relations defined on this L{Synset}. 359 """ 360 361 # Load the pointers from the Wordnet files if necessary. 362 if not hasattr(self, '_relations'): 363 relations = defaultdict(list) 364 365 for (type, offset, pos, indices) in self._pointerTuples: 366 rel = _RELATION_TABLE[type] 367 idx = int(indices, 16) & 255 368 pos = normalizePOS(pos) 369 offset = int(offset) 370 371 synset = dictionary.synset(pos, offset) 372 if idx: 373 relations[rel].append(synset[idx-1]) 374 else: 375 relations[rel].append(synset) 376 del self._pointerTuples 377 self._relations = dict(relations) 378 379 return self._relations
380
381 - def relation(self, rel):
382 return self.relations().get(rel, [])
383 384 ### BROKEN:
385 - def isTagged(self):
386 """ 387 >>> from nltk.wordnet import * 388 >>> N['dog'][0].isTagged() 389 True 390 391 >>> N['dog'][1].isTagged() 392 False 393 394 @return: True/false (1/0) if one of this L{Word}'s senses is tagged. 395 """ 396 return len(filter(Word.isTagged, self.words)) > 0
397
398 - def __str__(self):
399 """ 400 Return a human-readable representation. 401 402 >>> from nltk.wordnet import * 403 >>> str(N['dog'][0].synset) 404 '{noun: dog, domestic dog, Canis familiaris}' 405 """ 406 return "{" + self.pos + ": " + string.join(self.words, ", ") + "}"
407
408 - def __repr__(self):
409 return "{" + self.pos + ": " + string.join(self.words, ", ") + "}"
410
411 - def __cmp__(self, other):
412 return _compareInstances(self, other, ('pos', 'offset'))
413
414 - def __hash__(self):
415 return hash((self.pos, self.offset))
416
417 - def __ne__(self, other):
418 return not (self==other)
419
420 - def __getitem__(self, idx):
421 try: 422 return self.words[idx] # integer key 423 except TypeError: 424 return self.relation(idx) # string key
425
426 - def __iter__(self):
427 return iter(self.words)
428
429 - def __contains__(self, item):
430 return item in self.words
431
432 - def __getslice__(self, i, j):
433 return self.words[i:j]
434
435 - def __nonzero__(self):
436 return 1
437
438 - def __len__(self):
439 """ 440 >>> from nltk.wordnet import * 441 >>> len(N['dog'][0].synset) 442 3 443 """ 444 return len(self.words)
445
446 - def max_depth(self):
447 """ 448 @return: The length of the longest hypernym path from this synset to the root. 449 """ 450 451 if self[HYPERNYM] == []: 452 return 0 453 454 deepest = 0 455 for hypernym in self[HYPERNYM]: 456 depth = hypernym.max_depth() 457 if depth > deepest: 458 deepest = depth 459 return deepest + 1
460
461 - def min_depth(self):
462 """ 463 @return: The length of the shortest hypernym path from this synset to the root. 464 """ 465 466 if self[HYPERNYM] == []: 467 return 0 468 469 shallowest = 1000 470 for hypernym in self[HYPERNYM]: 471 depth = hypernym.max_depth() 472 if depth < shallowest: 473 shallowest = depth 474 return shallowest + 1
475
476 - def closure(self, rel, depth=-1):
477 """Return the transitive closure of source under the rel relationship, breadth-first 478 479 >>> dog = N['dog'][0] 480 >>> dog.closure(HYPERNYM) 481 [{noun: dog, domestic dog, Canis familiaris}, {noun: canine, canid}, {noun: carnivore}, {noun: placental, placental mammal, eutherian, eutherian mammal}, {noun: mammal, mammalian}, {noun: vertebrate, craniate}, {noun: chordate}, {noun: animal, animate being, beast, brute, creature, fauna}, {noun: organism, being}, {noun: living thing, animate thing}, {noun: object, physical object}, {noun: physical entity}, {noun: entity}] 482 """ 483 from nltk.util import breadth_first 484 synset_offsets = [] 485 for synset in breadth_first(self, lambda s:s[rel], depth): 486 if synset.offset != self.offset and synset.offset not in synset_offsets: 487 synset_offsets.append(synset.offset) 488 yield synset
489 # return synsets 490
491 - def hypernym_paths(self):
492 """ 493 Get the path(s) from this synset to the root, where each path is a 494 list of the synset nodes traversed on the way to the root. 495 496 @return: A list of lists, where each list gives the node sequence 497 connecting the initial L{Synset} node and a root node. 498 """ 499 paths = [] 500 501 hypernyms = self[HYPERNYM] 502 if len(hypernyms) == 0: 503 paths = [[self]] 504 505 for hypernym in hypernyms: 506 for ancestor_list in hypernym.hypernym_paths(): 507 ancestor_list.append(self) 508 paths.append(ancestor_list) 509 return paths
510
511 - def hypernym_distances(self, distance, verbose=False):
512 """ 513 Get the path(s) from this synset to the root, counting the distance 514 of each node from the initial node on the way. A list of 515 (synset, distance) tuples is returned. 516 517 @type distance: C{int} 518 @param distance: the distance (number of edges) from this hypernym to 519 the original hypernym L{Synset} on which this method was called. 520 @return: A list of (L{Synset}, int) tuples where each L{Synset} is 521 a hypernym of the first L{Synset}. 522 """ 523 distances = set([(self, distance)]) 524 525 for hypernym in self[HYPERNYM]: 526 distances |= hypernym.hypernym_distances(distance+1, verbose=False) 527 if verbose: 528 print "> Hypernym Distances:", self, string.join(synset.__str__() + ":" + `dist` for synset, dist in distances) 529 return distances
530
531 - def shortest_path_distance(self, other):
532 """ 533 Returns the distance of the shortest path linking the two synsets (if 534 one exists). For each synset, all the ancestor nodes and their distances 535 are recorded and compared. The ancestor node common to both synsets that 536 can be reached with the minimum number of traversals is used. If no 537 ancestor nodes are common, -1 is returned. If a node is compared with 538 itself 0 is returned. 539 540 @type other: L{Synset} 541 @param other: The Synset to which the shortest path will be found. 542 @return: The number of edges in the shortest path connecting the two 543 nodes, or -1 if no path exists. 544 """ 545 546 if self == other: return 0 547 548 path_distance = -1 549 550 dist_list1 = self.hypernym_distances(0) 551 dist_dict1 = {} 552 553 dist_list2 = other.hypernym_distances(0) 554 dist_dict2 = {} 555 556 # Transform each distance list into a dictionary. In cases where 557 # there are duplicate nodes in the list (due to there being multiple 558 # paths to the root) the duplicate with the shortest distance from 559 # the original node is entered. 560 561 for (l, d) in [(dist_list1, dist_dict1), (dist_list2, dist_dict2)]: 562 for (key, value) in l: 563 if key in d: 564 if value < d[key]: 565 d[key] = value 566 else: 567 d[key] = value 568 569 # For each ancestor synset common to both subject synsets, find the 570 # connecting path length. Return the shortest of these. 571 572 for synset1 in dist_dict1.keys(): 573 for synset2 in dist_dict2.keys(): 574 if synset1 == synset2: 575 new_distance = dist_dict1[synset1] + dist_dict2[synset2] 576 if path_distance < 0 or new_distance < path_distance: 577 path_distance = new_distance 578 579 return path_distance
580
581 - def tree(self, rel, depth=-1, cut_mark=None):
582 """ 583 >>> dog = N['dog'][0] 584 >>> from pprint import pprint 585 >>> pprint(dog.tree(HYPERNYM)) 586 ['dog' in {noun: dog, domestic dog, Canis familiaris}, 587 [{noun: canine, canid}, 588 [{noun: carnivore}, 589 [{noun: placental, placental mammal, eutherian, eutherian mammal}, 590 [{noun: mammal, mammalian}, 591 [{noun: vertebrate, craniate}, 592 [{noun: chordate}, 593 [{noun: animal, animate being, beast, brute, creature, fauna}, 594 [{noun: organism, being}, 595 [{noun: living thing, animate thing}, 596 [{noun: object, physical object}, 597 [{noun: physical entity}, [{noun: entity}]]]]]]]]]]]]] 598 """ 599 600 tree = [self] 601 if depth != 0: 602 tree += [x.tree(rel, depth-1, cut_mark) for x in self[rel]] 603 elif cut_mark: 604 tree += [cut_mark] 605 return tree
606 607 # interface to similarity methods 608
609 - def path_similarity(self, other, verbose=False):
610 return similarity.path_similarity(self, other, verbose)
611
612 - def lch_similarity(self, other, verbose=False):
613 return similarity.lch_similarity(self, other, verbose)
614
615 - def wup_similarity(self, other, verbose=False):
616 return similarity.wup_similarity(self, other, verbose)
617
618 - def res_similarity(self, other, ic, verbose=False):
619 return similarity.res_similarity(self, other, ic, verbose)
620
621 - def jcn_similarity(self, other, ic, verbose=False):
622 return similarity.jcn_similarity(self, other, ic, verbose)
623
624 - def lin_similarity(self, other, ic, verbose=False):
625 return similarity.lin_similarity(self, other, ic, verbose)
626 627 628 # Lexical Relations 629 630 _RELATION_TABLE = { 631 '!': ANTONYM, '@': HYPERNYM, '~': HYPONYM, '=': ATTRIBUTE, 632 '^': ALSO_SEE, '*': ENTAILMENT, '>': CAUSE, '$': VERB_GROUP, 633 '#m': MEMBER_MERONYM, '#s': SUBSTANCE_MERONYM, '#p': PART_MERONYM, 634 '%m': MEMBER_HOLONYM, '%s': SUBSTANCE_HOLONYM, '%p': PART_HOLONYM, 635 '&': SIMILAR, '<': PARTICIPLE_OF, '\\': PERTAINYM, '+': FRAMES, 636 ';c': CLASSIF_CATEGORY, ';u': CLASSIF_USAGE, ';r': CLASSIF_REGIONAL, 637 '-c': CLASS_CATEGORY, '-u': CLASS_USAGE, '-r': CLASS_REGIONAL, 638 '@i': INSTANCE_HYPERNYM,'~i': INSTANCE_HYPONYM, 639 } 640 641 # Private Utility Functions 642
643 -def _index(key, sequence, testfn=None, keyfn=None):
644 """ 645 Return the index of key within sequence, using testfn for 646 comparison and transforming items of sequence by keyfn first. 647 648 >>> _index('e', 'hello') 649 1 650 >>> _index('E', 'hello', testfn=_equalsIgnoreCase) 651 1 652 >>> _index('x', 'hello') 653 """ 654 index = 0 655 for element in sequence: 656 value = element 657 if keyfn: 658 value = keyfn(value) 659 if (not testfn and value == key) or (testfn and testfn(value, key)): 660 return index 661 index = index + 1 662 return None
663
664 -def _partition(sequence, size, count):
665 """ 666 Partition sequence into C{count} subsequences of 667 length C{size}, and a remainder. 668 669 Return C{(partitions, remainder)}, where C{partitions} is a sequence of 670 C{count} subsequences of cardinality C{size}, and 671 C{apply(append, partitions) + remainder == sequence}. 672 """ 673 674 partitions = [] 675 for index in range(0, size * count, size): 676 partitions.append(sequence[index:index + size]) 677 return (partitions, sequence[size * count:])
678
679 -def _compareInstances(a, b, fields):
680 """ 681 Return -1, 0, or 1 according to a comparison first by type, 682 then by class, and finally by each of fields. Used when comparing two 683 Wordnet objects (Synsets, Words, or Senses) to each other. 684 """ 685 if not hasattr(b, '__class__'): 686 return cmp(type(a), type(b)) 687 elif a.__class__ != b.__class__: 688 return cmp(a.__class__, b.__class__) 689 690 for field in fields: 691 diff = cmp(getattr(a, field), getattr(b, field)) 692 if diff: return diff 693 694 return 0
695
696 -def _equalsIgnoreCase(a, b):
697 """ 698 Return true iff a and b have the same lowercase representation. 699 700 >>> _equalsIgnoreCase('dog', 'Dog') 701 True 702 >>> _equalsIgnoreCase('dOg', 'DOG') 703 True 704 """ 705 return a == b or a.lower() == b.lower()
706 707 708
709 -def demo():
710 from nltk import wordnet 711 from pprint import pprint 712 713 dog = wordnet.N['dog'] 714 cat = wordnet.N['cat'] 715 716 print "wordnet.N['dog']" 717 print 'dog' in wordnet.N 718 print dog 719 print dog.pos, dog.form 720 print dog.taggedSenseCount 721 print dog.synsets() 722 print dog.isTagged() 723 # ADJ['clear'].getAdjectivePositions() 724 # N['cat'] < N['dog'] 725 # N['dog'] < V['dog'] 726 727 print "Verb Frames:", 728 print wordnet.V['think'][0].verbFrameStrings 729 730 print "Relations:" 731 print dog[0].relations() 732 print dog[0][wordnet.HYPERNYM] 733 734 print "Glosses:" 735 print dog[0].gloss 736 print dog[0].relation(wordnet.HYPERNYM)[0].gloss 737 738 print 739 print "Paths and Distances:" 740 print 741 742 print dog[0].hypernym_paths() 743 print dog[0].hypernym_distances(0) 744 print dog[0].shortest_path_distance(cat[0]) 745 746 print 747 print "Closures and Trees:" 748 print 749 750 pprint(wordnet.ADJ['red'][0].closure(wordnet.SIMILAR, depth=1)) 751 pprint(wordnet.ADJ['red'][0].closure(wordnet.SIMILAR, depth=2)) 752 pprint(dog[0].tree(wordnet.HYPERNYM)) 753 pprint(dog[0].tree(wordnet.HYPERNYM, depth=2, cut_mark = '...')) 754 755 entity = wordnet.N["entity"] 756 print entity, entity[0] 757 print entity[0][wordnet.HYPONYM] 758 pprint(entity[0].tree(wordnet.HYPONYM, depth=1), indent=4) 759 abstract_entity = wordnet.N["abstract entity"] 760 print abstract_entity, abstract_entity[0] 761 print abstract_entity[0][wordnet.HYPONYM] 762 pprint(abstract_entity[0].tree(wordnet.HYPONYM, depth=1), indent=4) 763 764 # Adjectives that are transitively SIMILAR to any of the senses of 'red' 765 #flatten1(map(lambda sense:closure(sense, SIMILAR), ADJ['red'])) # too verbose 766 767 print "All the words in the hyponym synsets of dog[0]" 768 print [word for synset in dog[0][wordnet.HYPONYM] for word in synset] 769 770 print "Hyponyms of the first (and only) sense of 'animal' that are homophonous with verbs:" 771 print [word for synset in wordnet.N['animal'][0].closure(wordnet.HYPONYM) for word in synset if word in wordnet.V] 772 773 # BROKEN 774 print "Senses of 'raise'(v.) and 'lower'(v.) that are antonyms:" 775 print filter(lambda p:p[0] in p[1][wordnet.ANTONYM], [(r,l) for r in wordnet.V['raise'] for l in wordnet.V['lower']]) 776 777 print 778 print "Similarity: dog~cat" 779 print 780 781 print "Path Distance Similarity:", 782 print dog[0].path_similarity(cat[0]) 783 print "Leacock Chodorow Similarity:", 784 print dog[0].lch_similarity(cat[0]) 785 print "Wu Palmer Similarity:", 786 print dog[0].wup_similarity(cat[0])
787 788 # set up the data file 789 # print "Resnik Similarity:", 790 # print dog[0].resnik_similarity(cat[0], datafile) 791 # print "Jiang-Conrath Similarity:", 792 # print dog[0].jiang_conrath_similarity(cat[0], datafile) 793 # print "Lin Similarity:", 794 # print dog[0].lin_similarity(cat[0], datafile) 795 796 if __name__ == '__main__': 797 demo() 798