GraphLab: Distributed Graph-Parallel API  2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hopscotch_set.hpp
1 /**
2  * Copyright (c) 2009 Carnegie Mellon University.
3  *
4  * All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an "AS
14  * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
15  * express or implied. See the License for the specific language
16  * governing permissions and limitations under the License.
17  *
18  * For more about this software visit:
19  *
20  * http://www.graphlab.ml.cmu.edu
21  *
22  */
23 
24 #ifndef GRAPHLAB_UTIL_HOPSCOTCH_SET_HPP
25 #define GRAPHLAB_UTIL_HOPSCOTCH_SET_HPP
26 
27 #include <graphlab/util/hopscotch_table.hpp>
28 #include <graphlab/parallel/pthread_tools.hpp>
29 #include <graphlab/parallel/atomic.hpp>
30 
31 #include <graphlab/serialization/serialization_includes.hpp>
32 
33 
34 #include <boost/functional/hash.hpp>
35 #define _HOPSCOTCH_SET_DEFAULT_HASH boost::hash<Key>
36 
37 
38 
39 namespace graphlab {
40 
41 
42 
43  /**
44  * A hopscotch hash set. More or less similar
45  * interface as boost::unordered_set, not necessarily
46  * entirely STL compliant.
47  * Really should only be used to store small keys and trivial values.
48  *
49  * \tparam Key The key of the set
50  * \tparam Synchronized Defaults to True. If True, locking is used to ensure
51  * safe reads and writes to the hash table.
52  * Even under "Synchronized", the only operations
53  * which are safe for parallel access are all functions
54  * suffixed with "sync".
55  * \tparam Hash The hash functor type. Defaults to std::hash<Key> if C++11 is
56  * available. Otherwise defaults to boost::hash<Key>
57  * \tparam KeyEqual The functor used to identify object equality. Defaults to
58  * std::equal_to<Key>
59  */
60  template <typename Key,
61  bool Synchronized = true,
62  typename Hash = _HOPSCOTCH_SET_DEFAULT_HASH,
63  typename KeyEqual = std::equal_to<Key> >
64  class hopscotch_set {
65 
66  public:
67  // public typedefs
68  typedef Key value_type;
69  typedef size_t size_type;
70  typedef Hash hasher;
71  typedef KeyEqual equality_function;
72  typedef value_type* pointer;
73  typedef value_type& reference;
74  typedef const value_type* const_pointer;
75  typedef const value_type& const_reference;
76 
77 
78  typedef Key storage_type;
79 
80  typedef hopscotch_table<storage_type,
81  Synchronized,
82  Hash,
83  KeyEqual> container_type;
84 
85  typedef typename container_type::iterator iterator;
87 
88  private:
89 
90 
91  // The primary storage. Used by all sequential accessors.
92  container_type* container;
93  spinrwlock lock;
94 
95  // the hash function to use. hashes a pair<key, value> to hash(key)
96  hasher hashfun;
97 
98  // the equality function to use. Tests equality on only the first
99  // element of the pair
100  equality_function equalfun;
101 
102  container_type* create_new_container(size_t size) {
103  return new container_type(size, hashfun, equalfun);
104  }
105 
106  void destroy_all() {
107  delete container;
108  container = NULL;
109  }
110 
111  // rehashes the hash table to one which is double the size
112  container_type* rehash_to_new_container(size_t newsize = (size_t)(-1)) {
113  /*
114  std::cerr << "Rehash at " << container->size() << "/"
115  << container->capacity() << ": "
116  << container->load_factor() << std::endl;
117  */
118  // rehash
119  if (newsize == (size_t)(-1)) newsize = container->size() * 2;
120  container_type* newcontainer = create_new_container(newsize);
121  const_iterator citer = begin();
122  while (citer != end()) {
123  assert(newcontainer->insert(*citer) != newcontainer->end());
124  ++citer;
125  }
126  return newcontainer;
127  }
128 
129  // Inserts a value into the hash table. This does not check
130  // if the key already exists, and may produce duplicate values.
131  iterator do_insert(const value_type &v) {
132  iterator iter = container->insert(v);
133 
134  if (iter != container->end()) {
135  return iter;
136  }
137  else {
138  container_type* newcontainer = rehash_to_new_container();
139  iter = newcontainer->insert(v);
140  assert(iter != newcontainer->end());
141  std::swap(container, newcontainer);
142  delete newcontainer;
143  return iter;
144  }
145  }
146 
147  public:
148 
149  hopscotch_set(size_t initialsize = 32,
150  Hash hashfun = Hash(),
151  KeyEqual equalfun = KeyEqual()):
152  container(NULL),
153  hashfun(hashfun), equalfun(equalfun) {
154  container = create_new_container(initialsize);
155  }
156 
157  hopscotch_set(const hopscotch_set& h):
158  hashfun(h.hashfun), equalfun(h.equalfun) {
159  container = create_new_container(h.capacity());
160  (*container) = *(h.container);
161  }
162 
163 
164  // only increases
165  void rehash(size_t s) {
166  if (s > capacity()) {
167  container_type* newcontainer = rehash_to_new_container(s);
168  std::swap(container, newcontainer);
169  delete newcontainer;
170  }
171  }
172 
173  ~hopscotch_set() {
174  destroy_all();
175  }
176 
177  hasher hash_function() const {
178  return hashfun;
179  }
180 
181  KeyEqual key_eq() const {
182  return equalfun;
183  }
184 
185  hopscotch_set& operator=(const hopscotch_set& other) {
186  (*container) = *(other.container);
187  hashfun = other.hashfun;
188  equalfun = other.equalfun;
189  return *this;
190  }
191 
192  size_type size() const {
193  return container->size();
194  }
195 
196  iterator begin() {
197  return container->begin();
198  }
199 
200  iterator end() {
201  return container->end();
202  }
203 
204 
205  const_iterator begin() const {
206  return container->begin();
207  }
208 
209  const_iterator end() const {
210  return container->end();
211  }
212 
213 
214  std::pair<iterator, bool> insert(const value_type& v) {
215  iterator i = find(v);
216  if (i != end()) return std::make_pair(i, false);
217  else return std::make_pair(do_insert(v), true);
218  }
219 
220 
221  iterator insert(const_iterator hint, const value_type& v) {
222  return insert(v).first;
223  }
224 
225  iterator find(value_type const& v) {
226  return container->find(v);
227  }
228 
229  const_iterator find(value_type const& v) const {
230  return container->find(v);
231  }
232 
233  size_t count(value_type const& v) const {
234  return container->count(v);
235  }
236 
237 
238  bool erase(iterator iter) {
239  return container->erase(iter);
240  }
241 
242  bool erase(value_type const& v) {
243  return container->erase(v);
244  }
245 
246  void swap(hopscotch_set& other) {
247  std::swap(container, other.container);
248  std::swap(hashfun, other.hashfun);
249  std::swap(equalfun, other.equalfun);
250  }
251 
252  void clear() {
253  destroy_all();
254  container = create_new_container(128);
255  }
256 
257 
258  size_t capacity() const {
259  return container->capacity();
260  }
261 
262 
263  float load_factor() const {
264  return container->load_factor();
265  }
266 
267  void save(oarchive &oarc) const {
268  oarc << size() << capacity();
269  const_iterator iter = begin();
270  while (iter != end()) {
271  oarc << (*iter);
272  ++iter;
273  }
274  }
275 
276 
277  void load(iarchive &iarc) {
278  size_t s, c;
279  iarc >> s >> c;
280  if (capacity() != c) {
281  destroy_all();
282  container = create_new_container(c);
283  }
284  else {
285  container->clear();
286  }
287  for (size_t i = 0;i < s; ++i) {
288  value_type v;
289  iarc >> v;
290  insert(v);
291  }
292  }
293  };
294 
295 }; // end of graphlab namespace
296 
297 #endif