23 #ifndef GRAPHLAB_UTIL_CUCKOO_MAP_HPP
24 #define GRAPHLAB_UTIL_CUCKOO_MAP_HPP
28 #include <boost/random.hpp>
29 #include <boost/unordered_map.hpp>
31 #include <graphlab/serialization/serialization_includes.hpp>
44 template <
typename Key,
typename Value,
46 typename IndexType = size_t,
47 typename Hash = boost::hash<Key>,
48 typename Pred = std::equal_to<Key> >
54 typedef std::pair<Key const, Value> value_type;
55 typedef Value mapped_type;
57 typedef Pred key_equal;
58 typedef IndexType index_type;
59 typedef value_type* pointer;
60 typedef value_type& reference;
61 typedef const value_type* const_pointer;
62 typedef const value_type& const_reference;
65 typedef std::pair<key_type, mapped_type> non_const_value_type;
66 typedef value_type* map_container_type;
67 typedef value_type* map_container_iterator;
68 typedef const value_type* map_container_const_iterator;
69 typedef boost::unordered_map<Key, Value, Hash, Pred> stash_container_type;
74 map_container_type data;
76 stash_container_type stash;
78 boost::uniform_int<index_type> kranddist;
82 map_container_iterator data_begin() {
86 map_container_iterator data_end() {
87 return data + datalen;
90 map_container_const_iterator data_begin()
const {
94 map_container_const_iterator data_end()
const {
95 return data + datalen;
98 void replace_in_vector(map_container_iterator iter,
100 const mapped_type& val) {
104 new(iter) value_type(key, val);
109 for(
size_t i = 0; i < datalen; ++i) {
110 data[i].~value_type();
120 struct insert_iterator{
122 typedef std::forward_iterator_tag iterator_category;
123 typedef typename cuckoo_map::value_type value_type;
127 insert_iterator operator++() {
130 insert_iterator operator++(
int) {
134 insert_iterator& operator*() {
137 insert_iterator& operator=(
const insert_iterator& i) {
142 insert_iterator& operator=(
const value_type& v) {
148 struct const_iterator {
151 typename cuckoo_map::map_container_const_iterator vec_iter;
152 typename cuckoo_map::stash_container_type::const_iterator stash_iter;
154 typedef std::forward_iterator_tag iterator_category;
155 typedef typename cuckoo_map::value_type value_type;
156 typedef size_t difference_type;
157 typedef const value_type* pointer;
158 typedef const value_type& reference;
162 const_iterator(): cmap(NULL), in_stash(
false) {}
164 const_iterator operator++() {
169 while(vec_iter != cmap->data_end() &&
170 cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
171 if (vec_iter == cmap->data_end()) {
173 stash_iter = cmap->stash.begin();
177 if (stash_iter != cmap->stash.end()) ++stash_iter;
182 const_iterator operator++(
int) {
183 const_iterator cur = *
this;
189 reference operator*() {
190 if (!in_stash)
return *vec_iter;
191 else return *stash_iter;
194 pointer operator->() {
195 if (!in_stash)
return &(*vec_iter);
196 else return &(*stash_iter);
199 bool operator==(
const const_iterator iter)
const {
200 return in_stash == iter.in_stash &&
202 vec_iter == iter.vec_iter :
203 stash_iter == iter.stash_iter);
206 bool operator!=(
const const_iterator iter)
const {
207 return !((*this) == iter);
211 const_iterator(
const cuckoo_map* cmap,
typename cuckoo_map::map_container_const_iterator vec_iter):
212 cmap(cmap), in_stash(
false), vec_iter(vec_iter), stash_iter(cmap->stash.begin()) { }
214 const_iterator(
const cuckoo_map* cmap,
typename cuckoo_map::stash_container_type::const_iterator stash_iter):
215 cmap(cmap), in_stash(
true), vec_iter(cmap->data_begin()), stash_iter(stash_iter) { }
222 typename cuckoo_map::map_container_iterator vec_iter;
223 typename cuckoo_map::stash_container_type::iterator stash_iter;
225 typedef std::forward_iterator_tag iterator_category;
226 typedef typename cuckoo_map::value_type value_type;
227 typedef size_t difference_type;
228 typedef value_type* pointer;
229 typedef value_type& reference;
233 iterator(): cmap(NULL), in_stash(
false) {}
236 operator const_iterator()
const {
239 iter.in_stash = in_stash;
240 iter.vec_iter = vec_iter;
241 iter.stash_iter = stash_iter;
245 iterator operator++() {
250 while(vec_iter != cmap->data_end() &&
251 cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
252 if (vec_iter == cmap->data_end()) {
254 stash_iter = cmap->stash.begin();
258 if (stash_iter != cmap->stash.end()) ++stash_iter;
263 iterator operator++(
int) {
264 iterator cur = *
this;
270 reference operator*() {
271 if (!in_stash)
return *vec_iter;
272 else return *stash_iter;
275 pointer operator->() {
276 if (!in_stash)
return &(*vec_iter);
277 else return &(*stash_iter);
280 bool operator==(
const iterator iter)
const {
281 return in_stash == iter.in_stash &&
283 vec_iter == iter.vec_iter :
284 stash_iter == iter.stash_iter);
287 bool operator!=(
const iterator iter)
const {
288 return !((*this) == iter);
293 iterator(
cuckoo_map* cmap,
typename cuckoo_map::map_container_iterator vec_iter):
294 cmap(cmap), in_stash(
false), vec_iter(vec_iter) { }
296 iterator(
cuckoo_map* cmap,
typename cuckoo_map::stash_container_type::iterator stash_iter):
297 cmap(cmap), in_stash(
true), stash_iter(stash_iter) { }
306 iterator do_insert(
const value_type& v_) {
307 non_const_value_type v(v_.first, v_.second);
308 if (stash.size() > maxstash) {
310 reserve(datalen * 1.5);
313 index_type insertpos = (index_type)(-1);
318 for (
int i = 0;i < 100; ++i) {
322 size_t hash_of_k = hashfun(v.first);
323 for (
size_t j = 0; j < CuckooK; ++j) {
324 idx = compute_hash(hash_of_k, j);
325 if (keyeq(data[idx].first, illegalkey)) {
330 if (!found) idx = compute_hash(hash_of_k, kranddist(drng));
336 if (insertpos == (index_type)(-1)) insertpos = idx;
337 else if (insertpos == idx) insertpos = (index_type)(-1);
339 if (found || keyeq(data[idx].first, illegalkey)) {
340 replace_in_vector(data_begin() + idx, v.first, v.second);
342 return iterator(
this, data_begin() + insertpos);
347 non_const_value_type tmp = data[idx];
348 replace_in_vector(data_begin() + idx, v.first, v.second);
354 typename stash_container_type::iterator stashiter = stash.insert(v).first;
356 if (insertpos == (index_type)(-1)) {
357 return iterator(
this, stashiter);
360 return iterator(
this, data_begin() + insertpos);
367 index_type stashsize = 8,
368 hasher
const& h = hasher(),
369 key_equal
const& k = key_equal()):
370 illegalkey(illegalkey),
371 numel(0),maxstash(stashsize),
372 data(NULL), datalen(0),
374 kranddist(0, CuckooK - 1), hashfun(h), keyeq(k) {
375 stash.max_load_factor(1.0);
384 data = (map_container_type)malloc(
sizeof(value_type) * other.datalen);
385 datalen = other.datalen;
386 std::uninitialized_copy(other.data_begin(), other.data_end(), data_begin());
392 illegalkey = other.illegalkey;
394 hashfun = other.hashfun;
399 const key_type& illegal_key()
const {
414 iter.in_stash =
false;
415 iter.vec_iter = data_begin();
417 while(iter.vec_iter != data_end() &&
418 keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;
421 if (iter.vec_iter == data_end()) {
422 iter.in_stash =
true;
423 iter.stash_iter = stash.begin();
430 return iterator(
this, stash.end());
434 const_iterator begin()
const {
437 iter.in_stash =
false;
438 iter.vec_iter = data_begin();
440 while(iter.vec_iter != data_end() &&
441 keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;
443 if (iter.vec_iter == data_end()) {
444 iter.in_stash =
true;
445 iter.stash_iter = stash.begin();
451 const_iterator end()
const {
452 return const_iterator(
this, stash.end());
460 static size_t mix(
size_t state) {
461 state += (state << 12);
462 state ^= (state >> 22);
463 state += (state << 4);
464 state ^= (state >> 9);
465 state += (state << 10);
466 state ^= (state >> 2);
467 state += (state << 7);
468 state ^= (state >> 12);
472 index_type compute_hash(
size_t k ,
const uint32_t
seed)
const {
474 #if (__SIZEOF_PTRDIFF_T__ == 8)
475 static const size_t a[8] = {0x6306AA9DFC13C8E7,
484 static const size_t a[8] = {0xFC13C8E7,
494 index_type s = mix(a[seed] ^ k);
499 stash_container_type stmp;
502 numel -= stmp.size();
503 for (
size_t i = 0;i < datalen; ++i) {
505 if (!keyeq(data[i].first, illegalkey)) {
506 if (count(data[i].first))
continue;
507 non_const_value_type v = data[i];
508 replace_in_vector(data_begin() + i, illegalkey, mapped_type());
514 typename stash_container_type::const_iterator iter = stmp.begin();
515 while(iter != stmp.end()) {
523 void reserve(
size_t newlen) {
526 data = (map_container_type)realloc(data, newlen *
sizeof(value_type));
527 std::uninitialized_fill(data_end(), data+newlen, non_const_value_type(illegalkey, mapped_type()));
532 std::pair<iterator, bool> insert(
const value_type& v_) {
533 iterator i = find(v_.first);
534 if (i != end())
return std::make_pair(i,
false);
535 else return std::make_pair(do_insert(v_),
true);
540 iterator insert(const_iterator hint, value_type
const& v) {
541 return insert(v).first;
544 iterator find(key_type
const& k) {
545 size_t hash_of_k = hashfun(k);
546 for (uint32_t i = 0;i < CuckooK; ++i) {
547 index_type idx = compute_hash(hash_of_k, i);
548 if (keyeq(data[idx].first, k))
return iterator(
this, data_begin() + idx);
550 return iterator(
this, stash.find(k));
553 const_iterator find(key_type
const& k)
const {
554 size_t hash_of_k = hashfun(k);
555 for (uint32_t i = 0;i < CuckooK; ++i) {
556 index_type idx = compute_hash(hash_of_k, i);
557 if (keyeq(data[idx].first, k))
return const_iterator(
this, data_begin() + idx);
559 return const_iterator(
this, stash.find(k));
562 size_t count(key_type
const& k)
const {
563 size_t hash_of_k = hashfun(k);
564 for (uint32_t i = 0;i < CuckooK; ++i) {
565 index_type idx = compute_hash(hash_of_k, i);
566 if (keyeq(data[idx].first, k))
return true;
568 return stash.count(k);
572 void erase(iterator iter) {
573 if (iter.in_stash ==
false) {
574 if (!keyeq(iter.vec_iter->first, illegalkey)) {
576 replace_in_vector(&(*(iter.vec_iter)), illegalkey, mapped_type());
581 else if (iter.stash_iter != stash.end()) {
583 stash.erase(iter.stash_iter);
587 void erase(key_type
const& k) {
588 iterator iter = find(k);
589 if (iter != end()) erase(iter);
593 std::swap(illegalkey, other.illegalkey);
594 std::swap(numel, other.numel);
595 std::swap(maxstash, other.maxstash);
596 std::swap(data, other.data);
597 std::swap(datalen, other.datalen);
598 std::swap(stash, other.stash);
599 std::swap(drng, other.drng);
600 std::swap(kranddist, other.kranddist);
601 std::swap(hashfun, other.hashfun);
602 std::swap(keyeq, other.keyeq);
605 mapped_type& operator[](
const key_type& i) {
606 iterator iter = find(i);
607 value_type tmp(i, mapped_type());
608 if (iter == end()) iter = do_insert(tmp);
612 key_equal key_eq()
const {
622 float load_factor()
const {
623 return (
float)numel / (datalen + stash.size());
627 oarc << numel << illegalkey;
635 iarc >> tmpnumel >> illegalkey;
636 reserve(tmpnumel * 1.5);
637 deserialize_iterator<iarchive, non_const_value_type>
638 (iarc, insert_iterator(
this));