GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hopscotch_map.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  *
4  * All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an "AS
14  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
15  * express or implied. See the License for the specific language
16  * governing permissions and limitations under the License.
17  *
18  * For more about this software visit:
19  *
20  * http://www.graphlab.ml.cmu.edu
21  *
22  */
23 
24 #ifndef GRAPHLAB_UTIL_HOPSCOTCH_HASH_HPP
25 #define GRAPHLAB_UTIL_HOPSCOTCH_HASH_HPP
26 
27 #include <graphlab/util/hopscotch_table.hpp>
28 #include <graphlab/parallel/pthread_tools.hpp>
29 #include <graphlab/parallel/atomic.hpp>
30 
31 #include <graphlab/serialization/serialization_includes.hpp>
32 
33 
34 #include <boost/functional/hash.hpp>
35 #define _HOPSCOTCH_MAP_DEFAULT_HASH boost::hash<Key>
36 
37 
38 
39 namespace graphlab {
40 
41 
42 
43  /**
44  * A hopscotch hash map. More or less similar
45  * interface as boost::unordered_map, not necessarily
46  * entirely STL compliant.
47  * Really should only be used to store small keys and trivial values.
48  *
49  * \tparam Key The key of the map
50  * \tparam Value The value to store for each key
51  * \tparam Synchronized Defaults to True. If True, locking is used to ensure
52  * safe reads and writes to the hash table.
53  * Even under "Synchronized", the only operations
54  * which are safe for parallel access are all functions
55  * suffixed with "sync".
56  * \tparam Hash The hash functor type. Defaults to std::hash<Key> if C++11 is
57  * available. Otherwise defaults to boost::hash<Key>
58  * \tparam KeyEqual The functor used to identify object equality. Defaults to
59  * std::equal_to<Key>
60  */
61  template <typename Key,
62  typename Value,
63  bool Synchronized = true,
64  typename Hash = _HOPSCOTCH_MAP_DEFAULT_HASH,
65  typename KeyEqual = std::equal_to<Key> >
66  class hopscotch_map {
67 
68  public:
69  // public typedefs
70  typedef Key key_type;
71  typedef std::pair<Key, Value> value_type;
72  typedef Value mapped_type;
73  typedef size_t size_type;
74  typedef Hash hasher;
75  typedef KeyEqual equality_function;
76  typedef value_type* pointer;
77  typedef value_type& reference;
78  typedef const value_type* const_pointer;
79  typedef const value_type& const_reference;
80 
81 
82  typedef std::pair<Key, Value> storage_type;
83 
84  struct hash_redirect{
85  Hash hashfun;
86  hash_redirect(Hash h): hashfun(h) { }
87  size_t operator()(const storage_type& v) const {
88  return hashfun(v.first);
89  }
90  };
91  struct key_equal_redirect{
92  KeyEqual keyeq;
93  key_equal_redirect(KeyEqual k): keyeq(k) { }
94  bool operator()(const storage_type& v, const storage_type& v2) const {
95  return keyeq(v.first, v2.first);
96  }
97  };
98 
99  typedef hopscotch_table<storage_type,
100  Synchronized,
101  hash_redirect,
102  key_equal_redirect> container_type;
103 
104  typedef typename container_type::iterator iterator;
106 
107  private:
108 
109 
110  // The primary storage. Used by all sequential accessors.
111  container_type* container;
112  spinrwlock2 lock;
113 
114  // the hash function to use. hashes a pair<key, value> to hash(key)
115  hash_redirect hashfun;
116 
117  // the equality function to use. Tests equality on only the first
118  // element of the pair
119  key_equal_redirect equalfun;
120 
121  container_type* create_new_container(size_t size) {
122  return new container_type(size, hashfun, equalfun);
123  }
124 
125  void destroy_all() {
126  delete container;
127  container = NULL;
128  }
129 
130  // rehashes the hash table to one which is double the size
131  container_type* rehash_to_new_container(size_t newsize = (size_t)(-1)) {
132  /*
133  std::cerr << "Rehash at " << container->size() << "/"
134  << container->capacity() << ": "
135  << container->load_factor() << std::endl;
136  */
137  // rehash
138  if (newsize == (size_t)(-1)) newsize = container->size() * 2;
139  container_type* newcontainer = create_new_container(newsize);
140  const_iterator citer = begin();
141  while (citer != end()) {
142  assert(newcontainer->insert(*citer) != newcontainer->end());
143  ++citer;
144  }
145  return newcontainer;
146  }
147 
148  // Inserts a value into the hash table. This does not check
149  // if the key already exists, and may produce duplicate values.
150  iterator do_insert(const value_type &v) {
151  iterator iter = container->insert(v);
152 
153  if (iter != container->end()) {
154  return iter;
155  }
156  else {
157  container_type* newcontainer = rehash_to_new_container();
158  iter = newcontainer->insert(v);
159  assert(iter != newcontainer->end());
160  std::swap(container, newcontainer);
161  delete newcontainer;
162  return iter;
163  }
164  }
165 
166  public:
167 
168  hopscotch_map(Hash hashfun = Hash(),
169  KeyEqual equalfun = KeyEqual()):
170  container(NULL),
171  hashfun(hashfun), equalfun(equalfun) {
172  container = create_new_container(32);
173  }
174 
175  hopscotch_map(const hopscotch_map& h):
176  hashfun(h.hashfun), equalfun(h.equalfun) {
177  container = create_new_container(h.capacity());
178  (*container) = *(h.container);
179  }
180 
181  // only increases
182  void rehash(size_t s) {
183  if (s > capacity()) {
184  container_type* newcontainer = rehash_to_new_container(s);
185  std::swap(container, newcontainer);
186  delete newcontainer;
187  }
188  }
189 
190  ~hopscotch_map() {
191  destroy_all();
192  }
193 
194  hasher hash_function() const {
195  return hashfun.hashfun;
196  }
197 
198  KeyEqual key_eq() const {
199  return equalfun.equalfun;
200  }
201 
202  hopscotch_map& operator=(const hopscotch_map& other) {
203  (*container) = *(other.container);
204  hashfun = other.hashfun;
205  equalfun = other.equalfun;
206  return *this;
207  }
208 
209  size_type size() const {
210  return container->size();
211  }
212 
213  iterator begin() {
214  return container->begin();
215  }
216 
217  iterator end() {
218  return container->end();
219  }
220 
221 
222  const_iterator begin() const {
223  return container->begin();
224  }
225 
226  const_iterator end() const {
227  return container->end();
228  }
229 
230 
231  std::pair<iterator, bool> insert(const value_type& v) {
232  iterator i = find(v.first);
233  if (i != end()) return std::make_pair(i, false);
234  else return std::make_pair(do_insert(v), true);
235  }
236 
237 
238  iterator insert(const_iterator hint, const value_type& v) {
239  return insert(v).first;
240  }
241 
242  iterator find(key_type const& k) {
243  value_type v(k, mapped_type());
244  return container->find(v);
245  }
246 
247  const_iterator find(key_type const& k) const {
248  value_type v(k, mapped_type());
249  return container->find(v);
250  }
251 
252  size_t count(key_type const& k) const {
253  value_type v(k, mapped_type());
254  return container->count(v);
255  }
256 
257 
258  bool erase(iterator iter) {
259  return container->erase(iter);
260  }
261 
262  bool erase(key_type const& k) {
263  value_type v(k, mapped_type());
264  return container->erase(v);
265  }
266 
267  void swap(hopscotch_map& other) {
268  std::swap(container, other.container);
269  std::swap(hashfun, other.hashfun);
270  std::swap(equalfun, other.equalfun);
271  }
272 
273  mapped_type& operator[](const key_type& i) {
274  iterator iter = find(i);
275  value_type tmp(i, mapped_type());
276  if (iter == end()) iter = do_insert(tmp);
277  return iter->second;
278  }
279 
280  void clear() {
281  destroy_all();
282  container = create_new_container(128);
283  }
284 
285 
286  size_t capacity() const {
287  return container->capacity();
288  }
289 
290 
291  float load_factor() const {
292  return container->load_factor();
293  }
294 
295  void save(oarchive &oarc) const {
296  oarc << size() << capacity();
297  const_iterator iter = begin();
298  while (iter != end()) {
299  oarc << (*iter);
300  ++iter;
301  }
302  }
303 
304 
305  void load(iarchive &iarc) {
306  size_t s, c;
307  iarc >> s >> c;
308  if (capacity() != c) {
309  destroy_all();
310  container = create_new_container(c);
311  }
312  else {
313  container->clear();
314  }
315  for (size_t i = 0;i < s; ++i) {
316  value_type v;
317  iarc >> v;
318  insert(v);
319  }
320  }
321 
322  void put_sync(const value_type &v) {
323  if (!Synchronized) {
324  (*this)[v.first] = v.second;
325  return;
326  }
327  lock.readlock();
328  // try to insert into the container
329  if (container->put_sync(v)) {
330  // success!
331  lock.rdunlock();
332  }
333  else {
334  // fail!
335  // I may need to rehash
336  lock.rdunlock();
337  // lets get an exclusive lock and try again
338  lock.writelock();
339  // I now have exclusive, I can use the unsynchronized accessors
340  if (container->insert(v) != container->end()) {
341  // success now
342  lock.wrunlock();
343  }
344  else {
345  // rehash
346  container_type* newcontainer = rehash_to_new_container();
347  // we much succeed now!
348  assert(newcontainer->insert(v) != newcontainer->end());
349  std::swap(container, newcontainer);
350  lock.wrunlock();
351  delete newcontainer;
352  }
353  }
354  }
355 
356  void put_sync(const Key& k, const Value& v) {
357  put_sync(std::make_pair(k, v));
358  }
359 
360  std::pair<bool, Value> get_sync(const Key& k) const {
361  if (!Synchronized) {
362  const_iterator iter = find(k);
363  return std::make_pair(iter == end(), iter->second);
364  }
365 
366  lock.readlock();
367  std::pair<bool, value_type> v =
368  container->get_sync(std::make_pair(k, mapped_type()));
369  lock.rdunlock();
370  return std::make_pair(v.first, v.second.second);
371  }
372 
373  bool erase_sync(const Key& k) {
374  if (!Synchronized) return erase(k);
375  lock.readlock();
376  bool ret = container->erase_sync(k);
377  lock.rdunlock();
378  return ret;
379  }
380 
381  };
382 
383 }; // end of graphlab namespace
384 
385 #endif