Package nltk :: Package wordnet :: Module cache
[hide private]
[frames] | no frames]

Source Code for Module nltk.wordnet.cache

  1  # Natural Language Toolkit: Wordnet Interface: Cache Module 
  2  # 
  3  # Copyright (C) 2001-2008 NLTK Project 
  4  # Author: Oliver Steele <[email protected]> 
  5  #         David Ormiston Smith <[email protected]>> 
  6  #         Steven Bird <[email protected]> 
  7  # URL: <http://nltk.org> 
  8  # For license information, see LICENSE.TXT 
  9   
 10  # Some kind of cache is necessary since Sense -> Synset references are 
 11  # stored by key, and it's nice not to have to cons a new copy of a 
 12  # Synset that's been paged in each time a Sense's synset is retrieved. 
 13  # Ideally, we'd use a weak dict, but there aren't any.  A strong dict 
 14  # reintroduces the problem that eliminating the Sense <-> Synset 
 15  # circularity was intended to resolve: every entity ever seen is 
 16  # preserved forever, making operations that iterate over the entire 
 17  # database prohibitive. 
 18  # 
 19  # The LRUCache approximates a weak dict in the case where temporal 
 20  # locality is good. 
 21   
 22  from util import * 
 23   
 24  DEFAULT_CACHE_CAPACITY = 1000 
 25   
26 -class _LRUCache:
27 """ 28 A cache of values such that least recently used element is flushed when 29 the cache fills. 30 31 This lets us retrieve the key given the timestamp, and the 32 timestamp given the key. (Also the value given either one.) 33 That's necessary so that we can reorder the history given a key, 34 and also manipulate the values dict given a timestamp. 35 36 I haven't tried changing history to a List. An earlier 37 implementation of history as a List was slower than what's here, 38 but the two implementations aren't directly comparable. 39 40 @type values: C{dict} 41 @ivar values: A dict from key -> (value, timestamp) 42 43 @type history: C{dict} 44 @ivar history: A dict from timestamp -> key 45 46 @type nextTimestamp: C{int} 47 @ivar nextTimestamp: Timestamp to use with the next value that's added. 48 49 @type oldestTimestamp: C{int} 50 @ivar oldestTimestamp: Timestamp of the oldest element (the next one to 51 remove), or slightly lower than that. 52 """ 53
54 - def __init__(self, capacity):
55 """ 56 Initialize a new cache 57 58 @type capacity: int 59 @param capacity: Size of the cache (number of Sense -> Synset mappings) 60 """ 61 self.capacity = capacity 62 self.clear()
63
64 - def clear(self):
65 """ 66 Flush the cache 67 """ 68 self.values = {} 69 self.history = {} 70 self.oldestTimestamp = 0 71 self.nextTimestamp = 1
72
73 - def removeOldestEntry(self):
74 """ 75 Remove the oldest entry from the cache. 76 """ 77 while self.oldestTimestamp < self.nextTimestamp: 78 79 if self.history.get(self.oldestTimestamp): 80 key = self.history[self.oldestTimestamp] 81 del self.history[self.oldestTimestamp] 82 del self.values[key] 83 return 84 85 self.oldestTimestamp = self.oldestTimestamp + 1
86
87 - def setCapacity(self, capacity):
88 """ 89 Set the capacity of the cache. 90 91 @type capacity: int 92 @param capacity: new size of the cache 93 """ 94 if capacity == 0: self.clear() 95 96 else: 97 self.capacity = capacity 98 99 while len(self.values) > self.capacity: 100 self.removeOldestEntry()
101
102 - def get(self, key, loadfn=None):
103 """ 104 Get an item from the cache. 105 106 @type key: unknown 107 @param key: identifier of a cache entry 108 109 @type loadfn: function reference 110 @param loadfn: a function used to load the cached entry 111 112 @return: a cached item 113 """ 114 value = None 115 116 # Look up the cache 117 if self.values: 118 try: 119 value, timestamp = self.values.get(key) 120 del self.history[timestamp] 121 except KeyError: 122 value = None 123 124 # Load the value if it wasn't cached 125 if value == None: 126 value = loadfn and loadfn() 127 128 # Cache the value we loaded 129 if self.values: 130 timestamp = self.nextTimestamp 131 self.nextTimestamp = self.nextTimestamp + 1 132 self.values[key] = (value, timestamp) 133 self.history[timestamp] = key 134 135 if len(self.values) > self.capacity: 136 self.removeOldestEntry() 137 138 return value
139
140 -class _NullCache:
141 """ 142 A NullCache implements the Cache interface (the interface that 143 LRUCache implements), but doesn't store any values. 144 """ 145
146 - def clear():
147 pass
148
149 - def get(self, key, loadfn=None):
150 return loadfn and loadfn()
151
152 -def disableCache():
153 """Disable the entity cache.""" 154 entityCache = _NullCache()
155
156 -def enableCache():
157 """Enable the entity cache.""" 158 if not isinstance(entityCache, LRUCache): 159 entityCache = _LRUCache(DEFAULT_CACHE_CAPACITY)
160
161 -def clearCache():
162 """Clear the entity cache.""" 163 entityCache.clear()
164
165 -def setCacheCapacity(capacity=DEFAULT_CACHE_CAPACITY):
166 """ 167 Set the capacity of the entity cache. 168 169 @type capacity: int 170 @param capacity: new size of the cache. 171 """ 172 enableCache() 173 entityCache.setCapacity(capacity)
174
175 -def buildIndexFiles():
176 177 for dict in Dictionaries: 178 dict._buildIndexCacheFile()
179 180 # Create a default cache 181 entityCache = _LRUCache(DEFAULT_CACHE_CAPACITY) 182