23 #ifndef GRAPHLAB_UTIL_CUCKOO_MAP_POW2_HPP
24 #define GRAPHLAB_UTIL_CUCKOO_MAP_POW2_HPP
28 #include <boost/random.hpp>
29 #include <boost/unordered_map.hpp>
31 #include <graphlab/serialization/serialization_includes.hpp>
43 template <
typename Key,
typename Value,
45 typename IndexType = size_t,
46 typename Hash = boost::hash<Key>,
47 typename Pred = std::equal_to<Key> >
53 typedef std::pair<Key const, Value> value_type;
54 typedef Value mapped_type;
56 typedef Pred key_equal;
57 typedef IndexType index_type;
58 typedef value_type* pointer;
59 typedef value_type& reference;
60 typedef const value_type* const_pointer;
61 typedef const value_type& const_reference;
65 typedef std::pair<key_type, mapped_type> non_const_value_type;
66 typedef value_type* map_container_type;
67 typedef value_type* map_container_iterator;
68 typedef const value_type* map_container_const_iterator;
69 typedef boost::unordered_map<Key, Value, Hash, Pred> stash_container_type;
74 map_container_type data;
76 stash_container_type stash;
78 boost::uniform_int<index_type> kranddist;
83 map_container_iterator data_begin() {
87 map_container_iterator data_end() {
88 return data + datalen;
91 map_container_const_iterator data_begin()
const {
95 map_container_const_iterator data_end()
const {
96 return data + datalen;
101 void replace_in_vector(map_container_iterator iter,
103 const mapped_type& val) {
107 new(iter) value_type(key, val);
112 for(
size_t i = 0; i < datalen; ++i) {
113 data[i].~value_type();
123 struct insert_iterator{
125 typedef std::forward_iterator_tag iterator_category;
126 typedef typename cuckoo_map_pow2::value_type value_type;
130 insert_iterator operator++() {
133 insert_iterator operator++(
int) {
137 insert_iterator& operator*() {
140 insert_iterator& operator=(
const insert_iterator& i) {
145 insert_iterator& operator=(
const value_type& v) {
151 struct const_iterator {
154 typename cuckoo_map_pow2::map_container_const_iterator vec_iter;
155 typename cuckoo_map_pow2::stash_container_type::const_iterator stash_iter;
157 typedef std::forward_iterator_tag iterator_category;
158 typedef typename cuckoo_map_pow2::value_type value_type;
159 typedef size_t difference_type;
160 typedef const value_type* pointer;
161 typedef const value_type& reference;
165 const_iterator(): cmap(NULL), in_stash(
false) {}
167 const_iterator operator++() {
172 while(vec_iter != cmap->data_end() &&
173 cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
174 if (vec_iter == cmap->data_end()) {
176 stash_iter = cmap->stash.begin();
180 if (stash_iter != cmap->stash.end()) ++stash_iter;
185 const_iterator operator++(
int) {
186 const_iterator cur = *
this;
192 reference operator*() {
193 if (!in_stash)
return *vec_iter;
194 else return *stash_iter;
197 pointer operator->() {
198 if (!in_stash)
return &(*vec_iter);
199 else return &(*stash_iter);
202 bool operator==(
const const_iterator iter)
const {
203 return in_stash == iter.in_stash &&
205 vec_iter == iter.vec_iter :
206 stash_iter == iter.stash_iter);
209 bool operator!=(
const const_iterator iter)
const {
210 return !((*this) == iter);
214 const_iterator(
const cuckoo_map_pow2* cmap,
typename cuckoo_map_pow2::map_container_const_iterator vec_iter):
215 cmap(cmap), in_stash(
false), vec_iter(vec_iter), stash_iter(cmap->stash.begin()) { }
217 const_iterator(
const cuckoo_map_pow2* cmap,
typename cuckoo_map_pow2::stash_container_type::const_iterator stash_iter):
218 cmap(cmap), in_stash(
true), vec_iter(cmap->data_begin()), stash_iter(stash_iter) { }
226 typename cuckoo_map_pow2::map_container_iterator vec_iter;
227 typename cuckoo_map_pow2::stash_container_type::iterator stash_iter;
229 typedef std::forward_iterator_tag iterator_category;
230 typedef typename cuckoo_map_pow2::value_type value_type;
231 typedef size_t difference_type;
232 typedef value_type* pointer;
233 typedef value_type& reference;
237 iterator(): cmap(NULL), in_stash(
false) {}
240 operator const_iterator()
const {
243 iter.in_stash = in_stash;
244 iter.vec_iter = vec_iter;
245 iter.stash_iter = stash_iter;
249 iterator operator++() {
254 while(vec_iter != cmap->data_end() &&
255 cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
256 if (vec_iter == cmap->data_end()) {
258 stash_iter = cmap->stash.begin();
262 if (stash_iter != cmap->stash.end()) ++stash_iter;
267 iterator operator++(
int) {
268 iterator cur = *
this;
274 reference operator*() {
275 if (!in_stash)
return *vec_iter;
276 else return *stash_iter;
279 pointer operator->() {
280 if (!in_stash)
return &(*vec_iter);
281 else return &(*stash_iter);
284 bool operator==(
const iterator iter)
const {
285 return in_stash == iter.in_stash &&
287 vec_iter == iter.vec_iter :
288 stash_iter == iter.stash_iter);
291 bool operator!=(
const iterator iter)
const {
292 return !((*this) == iter);
298 typename cuckoo_map_pow2::map_container_iterator vec_iter):
299 cmap(cmap), in_stash(
false), vec_iter(vec_iter) { }
302 typename cuckoo_map_pow2::stash_container_type::iterator stash_iter):
303 cmap(cmap), in_stash(
true), stash_iter(stash_iter) { }
313 iterator do_insert(
const value_type& v_) {
314 non_const_value_type v(v_.first, v_.second);
315 if (stash.size() > maxstash) {
317 reserve(datalen * 2);
320 index_type insertpos = (index_type)(-1);
325 for (
int i = 0;i < 100; ++i) {
329 size_t hash_of_k = hashfun(v.first);
330 for (
size_t j = 0; j < CuckooK; ++j) {
331 idx = compute_hash(hash_of_k, j);
332 if (keyeq(data[idx].first, illegalkey)) {
337 if (!found) idx = compute_hash(hash_of_k, kranddist(drng));
343 if (insertpos == (index_type)(-1)) insertpos = idx;
344 else if (insertpos == idx) insertpos = (index_type)(-1);
346 if (found || keyeq(data[idx].first, illegalkey)) {
347 replace_in_vector(data_begin() + idx, v.first, v.second);
349 return iterator(
this, data_begin() + insertpos);
354 non_const_value_type tmp = data[idx];
355 replace_in_vector(data_begin() + idx, v.first, v.second);
361 typename stash_container_type::iterator stashiter = stash.insert(v).first;
363 if (insertpos == (index_type)(-1)) {
364 return iterator(
this, stashiter);
367 return iterator(
this, data_begin() + insertpos);
373 index_type stashsize = 8,
374 hasher
const& h = hasher(),
375 key_equal
const& k = key_equal()):
376 illegalkey(illegalkey),
377 numel(0),maxstash(stashsize),
378 data(NULL), datalen(0),
380 kranddist(0, CuckooK - 1), hashfun(h), keyeq(k), mask(127) {
381 stash.max_load_factor(1.0);
385 const key_type& illegal_key()
const {
396 data = (map_container_type)malloc(
sizeof(value_type) * other.datalen);
397 datalen = other.datalen;
398 std::uninitialized_copy(other.data_begin(), other.data_end(), data_begin());
404 illegalkey = other.illegalkey;
406 hashfun = other.hashfun;
419 iter.in_stash =
false;
420 iter.vec_iter = data_begin();
422 while(iter.vec_iter != data_end() &&
423 keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;
425 if (iter.vec_iter == data_end()) {
426 iter.in_stash =
true;
427 iter.stash_iter = stash.begin();
433 return iterator(
this, stash.end());
437 const_iterator begin()
const {
440 iter.in_stash =
false;
441 iter.vec_iter = data_begin();
443 while(iter.vec_iter != data_end() &&
444 keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;
446 if (iter.vec_iter == data_end()) {
447 iter.in_stash =
true;
448 iter.stash_iter = stash.begin();
454 const_iterator end()
const {
455 return const_iterator(
this, stash.end());
463 static size_t mix(
size_t state) {
464 state += (state << 12);
465 state ^= (state >> 22);
466 state += (state << 4);
467 state ^= (state >> 9);
468 state += (state << 10);
469 state ^= (state >> 2);
470 state += (state << 7);
471 state ^= (state >> 12);
475 index_type compute_hash(
size_t k ,
const uint32_t
seed)
const {
477 #if (__SIZEOF_PTRDIFF_T__ == 8)
478 static const size_t a[8] = {0x6306AA9DFC13C8E7,
487 static const size_t a[8] = {0xFC13C8E7,
496 index_type s = mix(a[seed] ^ k);
501 stash_container_type stmp;
504 numel -= stmp.size();
505 for (
size_t i = 0;i < datalen; ++i) {
507 if (!keyeq(data[i].first, illegalkey)) {
508 if (count(data[i].first))
continue;
509 non_const_value_type v = data[i];
510 replace_in_vector(data_begin() + i, illegalkey, mapped_type());
516 typename stash_container_type::const_iterator iter = stmp.begin();
517 while(iter != stmp.end()) {
523 static uint64_t next_powerof2(uint64_t val) {
525 val = val | (val >> 1);
526 val = val | (val >> 2);
527 val = val | (val >> 4);
528 val = val | (val >> 8);
529 val = val | (val >> 16);
530 val = val | (val >> 32);
535 void reserve(
size_t newlen) {
536 newlen = next_powerof2(newlen);
540 data = (map_container_type)realloc(data, newlen *
sizeof(value_type));
541 std::uninitialized_fill(data_end(), data+newlen, non_const_value_type(illegalkey, mapped_type()));
546 std::pair<iterator, bool> insert(
const value_type& v_) {
547 iterator i = find(v_.first);
548 if (i != end())
return std::make_pair(i,
false);
549 else return std::make_pair(do_insert(v_),
true);
552 iterator insert(const_iterator hint, value_type
const& v) {
553 return insert(v).first;
556 iterator find(key_type
const& k) {
557 size_t hash_of_k = hashfun(k);
558 for (uint32_t i = 0;i < CuckooK; ++i) {
559 index_type idx = compute_hash(hash_of_k, i);
560 if (keyeq(data[idx].first, k))
return iterator(
this, data_begin() + idx);
562 return iterator(
this, stash.find(k));
565 const_iterator find(key_type
const& k)
const {
566 size_t hash_of_k = hashfun(k);
567 for (uint32_t i = 0;i < CuckooK; ++i) {
568 index_type idx = compute_hash(hash_of_k, i);
569 if (keyeq(data[idx].first, k))
return const_iterator(
this, data_begin() + idx);
571 return const_iterator(
this, stash.find(k));
574 size_t count(key_type
const& k)
const {
575 size_t hash_of_k = hashfun(k);
576 for (uint32_t i = 0;i < CuckooK; ++i) {
577 index_type idx = compute_hash(hash_of_k, i);
578 if (keyeq(data[idx].first, k))
return true;
580 return stash.count(k);
584 void erase(iterator iter) {
585 if (iter.in_stash ==
false) {
586 if (!keyeq(iter.vec_iter->first, illegalkey)) {
588 replace_in_vector(&(*(iter.vec_iter)), illegalkey, mapped_type());
593 else if (iter.stash_iter != stash.end()) {
595 stash.erase(iter.stash_iter);
599 void erase(key_type
const& k) {
600 iterator iter = find(k);
601 if (iter != end()) erase(iter);
605 std::swap(illegalkey, other.illegalkey);
606 std::swap(numel, other.numel);
607 std::swap(maxstash, other.maxstash);
608 std::swap(data, other.data);
609 std::swap(datalen, other.datalen);
610 std::swap(stash, other.stash);
611 std::swap(drng, other.drng);
612 std::swap(kranddist, other.kranddist);
613 std::swap(hashfun, other.hashfun);
614 std::swap(keyeq, other.keyeq);
615 std::swap(mask, other.mask);
618 mapped_type& operator[](
const key_type& i) {
619 iterator iter = find(i);
620 value_type tmp(i, mapped_type());
621 if (iter == end()) iter = do_insert(tmp);
625 key_equal key_eq()
const {
635 float load_factor()
const {
636 return (
float)numel / (datalen + stash.size());
640 oarc << numel << illegalkey;
647 index_type tmpnumel = 0;
648 iarc >> tmpnumel >> illegalkey;
650 reserve(tmpnumel * 1.5);
651 deserialize_iterator<iarchive, non_const_value_type>
652 (iarc, insert_iterator(
this));