23 #ifndef GRAPHLAB_UTIL_CUCKOO_SET_POW2_HPP
24 #define GRAPHLAB_UTIL_CUCKOO_SET_POW2_HPP
28 #include <boost/random.hpp>
29 #include <boost/unordered_map.hpp>
31 #include <graphlab/serialization/serialization_includes.hpp>
43 template <
typename Key,
45 typename IndexType = size_t,
46 typename Hash = boost::hash<Key>,
47 typename Pred = std::equal_to<Key> >
53 typedef Key value_type;
55 typedef Pred key_equal;
56 typedef IndexType index_type;
57 typedef value_type* pointer;
58 typedef value_type& reference;
59 typedef const value_type* const_pointer;
60 typedef const value_type& const_reference;
64 typedef key_type non_const_value_type;
65 typedef value_type* map_container_type;
66 typedef value_type* map_container_iterator;
67 typedef const value_type* map_container_const_iterator;
68 typedef std::vector<Key> stash_container_type;
73 map_container_type data;
75 stash_container_type stash;
77 boost::uniform_int<index_type> kranddist;
82 map_container_iterator data_begin() {
86 map_container_iterator data_end() {
87 return data + datalen;
90 map_container_const_iterator data_begin()
const {
94 map_container_const_iterator data_end()
const {
95 return data + datalen;
100 void replace_in_vector(map_container_iterator iter,
101 const key_type& key) {
105 new(iter) value_type(key);
111 for(
size_t i = 0; i < datalen; ++i) {
112 data[i].~value_type();
123 struct insert_iterator{
125 typedef std::forward_iterator_tag iterator_category;
126 typedef typename cuckoo_set_pow2::value_type value_type;
130 insert_iterator operator++() {
133 insert_iterator operator++(
int) {
137 insert_iterator& operator*() {
140 insert_iterator& operator=(
const insert_iterator& i) {
145 insert_iterator& operator=(
const value_type& v) {
151 struct const_iterator {
154 typename cuckoo_set_pow2::map_container_const_iterator vec_iter;
155 typename cuckoo_set_pow2::stash_container_type::const_iterator stash_iter;
157 typedef std::forward_iterator_tag iterator_category;
158 typedef typename cuckoo_set_pow2::value_type value_type;
159 typedef size_t difference_type;
160 typedef const value_type& reference;
161 typedef const value_type* pointer;
164 const_iterator(): cmap(NULL), in_stash(
false) {}
166 const_iterator operator++() {
171 while(vec_iter != cmap->data_end() &&
172 cmap->key_eq()(*vec_iter, cmap->illegal_key())) ++vec_iter;
173 if (vec_iter == cmap->data_end()) {
175 stash_iter = cmap->stash.begin();
179 if (stash_iter != cmap->stash.end()) ++stash_iter;
184 const_iterator operator++(
int) {
185 const_iterator cur = *
this;
191 reference operator*() {
192 if (!in_stash)
return *vec_iter;
193 else return *stash_iter;
196 bool operator==(
const const_iterator iter)
const {
197 return in_stash == iter.in_stash &&
199 vec_iter == iter.vec_iter :
200 stash_iter == iter.stash_iter);
203 bool operator!=(
const const_iterator iter)
const {
204 return !((*this) == iter);
208 const_iterator(
const cuckoo_set_pow2* cmap,
typename cuckoo_set_pow2::map_container_const_iterator vec_iter):
209 cmap(cmap), in_stash(
false), vec_iter(vec_iter), stash_iter(cmap->stash.begin()) { }
211 const_iterator(
const cuckoo_set_pow2* cmap,
typename cuckoo_set_pow2::stash_container_type::const_iterator stash_iter):
212 cmap(cmap), in_stash(
true), vec_iter(cmap->data_begin()), stash_iter(stash_iter) { }
220 typename cuckoo_set_pow2::map_container_iterator vec_iter;
221 typename cuckoo_set_pow2::stash_container_type::iterator stash_iter;
223 typedef std::forward_iterator_tag iterator_category;
224 typedef typename cuckoo_set_pow2::value_type value_type;
225 typedef size_t difference_type;
226 typedef value_type& reference;
227 typedef value_type* pointer;
230 iterator(): cmap(NULL), in_stash(
false) {}
233 operator const_iterator()
const {
236 iter.in_stash = in_stash;
237 iter.vec_iter = vec_iter;
238 iter.stash_iter = stash_iter;
242 iterator operator++() {
247 while(vec_iter != cmap->data_end() &&
248 cmap->key_eq()(*vec_iter, cmap->illegal_key())) ++vec_iter;
249 if (vec_iter == cmap->data_end()) {
251 stash_iter = cmap->stash.begin();
255 if (stash_iter != cmap->stash.end()) ++stash_iter;
260 iterator operator++(
int) {
261 iterator cur = *
this;
267 reference operator*() {
268 if (!in_stash)
return *vec_iter;
269 else return *stash_iter;
272 bool operator==(
const iterator iter)
const {
273 return in_stash == iter.in_stash &&
275 vec_iter == iter.vec_iter :
276 stash_iter == iter.stash_iter);
279 bool operator!=(
const iterator iter)
const {
280 return !((*this) == iter);
286 typename cuckoo_set_pow2::map_container_iterator vec_iter):
287 cmap(cmap), in_stash(
false), vec_iter(vec_iter) { }
290 typename cuckoo_set_pow2::stash_container_type::iterator stash_iter):
291 cmap(cmap), in_stash(
true), stash_iter(stash_iter) { }
301 iterator do_insert(
const value_type& v_) {
302 non_const_value_type v = v_;
303 if (stash.size() > maxstash) {
305 reserve(datalen * 2);
308 index_type insertpos = (index_type)(-1);
313 for (
int i = 0;i < 100; ++i) {
317 size_t hash_of_k = hashfun(v);
318 for (
size_t j = 0; j < CuckooK; ++j) {
319 idx = compute_hash(hash_of_k, j);
320 if (keyeq(data[idx], illegalkey)) {
325 if (!found) idx = compute_hash(hash_of_k, kranddist(drng));
331 if (insertpos == (index_type)(-1)) insertpos = idx;
332 else if (insertpos == idx) insertpos = (index_type)(-1);
334 if (found || keyeq(data[idx], illegalkey)) {
335 replace_in_vector(data_begin() + idx, v);
337 return iterator(
this, data_begin() + insertpos);
342 non_const_value_type tmp = data[idx];
343 replace_in_vector(data_begin() + idx, v);
349 typename stash_container_type::iterator stashiter = stash.insert(stash.end(), v);
351 if (insertpos == (index_type)(-1)) {
352 return iterator(
this, stashiter);
355 return iterator(
this, data_begin() + insertpos);
361 index_type stashsize = 8,
362 index_type reserve_size = 128,
363 hasher
const& h = hasher(),
364 key_equal
const& k = key_equal()):
365 illegalkey(illegalkey),
366 numel(0),maxstash(stashsize),
367 data(NULL), datalen(0),
369 kranddist(0, CuckooK - 1), hashfun(h), keyeq(k), mask(reserve_size - 1) {
370 reserve(reserve_size);
374 illegalkey(other.illegalkey),
375 numel(0), maxstash(other.maxstash),
376 data(NULL), datalen(0),
377 drng(time(NULL)), kranddist(0, CuckooK - 1),
378 hashfun(other.hashfun), keyeq(other.keyeq), mask(0) {
384 const key_type& illegal_key()
const {
393 if (&other ==
this)
return *
this;
394 if (other.numel == 0 && numel == 0)
return *
this;
395 else if (other.numel == 0) {
396 for (
size_t i = 0;i < datalen; ++i) data[i] = illegalkey;
405 data = (map_container_type)malloc(
sizeof(value_type) * other.datalen);
406 datalen = other.datalen;
407 std::uninitialized_copy(other.data_begin(), other.data_end(), data_begin());
411 hashfun = other.hashfun;
418 index_type size()
const {
425 iter.in_stash =
false;
426 iter.vec_iter = data_begin();
428 while(iter.vec_iter != data_end() &&
429 keyeq(*(iter.vec_iter), illegalkey)) ++iter.vec_iter;
431 if (iter.vec_iter == data_end()) {
432 iter.in_stash =
true;
433 iter.stash_iter = stash.begin();
439 return iterator(
this, stash.end());
443 const_iterator begin()
const {
446 iter.in_stash =
false;
447 iter.vec_iter = data_begin();
449 while(iter.vec_iter != data_end() &&
450 keyeq(*(iter.vec_iter), illegalkey)) ++iter.vec_iter;
453 if (iter.vec_iter == data_end()) {
454 iter.in_stash =
true;
455 iter.stash_iter = stash.begin();
461 const_iterator end()
const {
462 return const_iterator(
this, stash.end());
470 static size_t mix(
size_t state) {
471 state += (state << 12);
472 state ^= (state >> 22);
473 state += (state << 4);
474 state ^= (state >> 9);
475 state += (state << 10);
476 state ^= (state >> 2);
477 state += (state << 7);
478 state ^= (state >> 12);
482 index_type compute_hash(
size_t k ,
const uint32_t
seed)
const {
484 #if (__SIZEOF_PTRDIFF_T__ == 8)
485 static const size_t a[8] = {0x6306AA9DFC13C8E7,
494 static const size_t a[8] = {0xFC13C8E7,
503 index_type s = mix(a[seed] ^ k);
508 if (numel == 0)
return;
509 stash_container_type stmp;
512 numel -= stmp.size();
513 for (
size_t i = 0;i < datalen; ++i) {
515 if (!keyeq(data[i], illegalkey)) {
516 if (count(data[i]))
continue;
517 non_const_value_type v = data[i];
518 replace_in_vector(data_begin() + i, illegalkey);
524 typename stash_container_type::const_iterator iter = stmp.begin();
525 while(iter != stmp.end()) {
531 static uint64_t next_powerof2(uint64_t val) {
533 val = val | (val >> 1);
534 val = val | (val >> 2);
535 val = val | (val >> 4);
536 val = val | (val >> 8);
537 val = val | (val >> 16);
538 val = val | (val >> 32);
543 void reserve(
size_t newlen) {
544 newlen = next_powerof2(newlen);
545 if (newlen <= datalen)
return;
550 data = (map_container_type)realloc(data, newlen *
sizeof(value_type));
551 std::uninitialized_fill(data_end(), data+newlen, non_const_value_type(illegalkey));
556 std::pair<iterator, bool> insert(
const value_type& v_) {
557 iterator i = find(v_);
558 if (i != end())
return std::make_pair(i,
false);
559 else return std::make_pair(do_insert(v_),
true);
562 iterator insert(const_iterator hint, value_type
const& v) {
563 return insert(v).first;
566 iterator find(key_type
const& k) {
567 size_t hash_of_k = hashfun(k);
568 for (uint32_t i = 0;i < CuckooK; ++i) {
569 index_type idx = compute_hash(hash_of_k, i);
570 if (keyeq(data[idx], k))
return iterator(
this, data_begin() + idx);
572 return iterator(
this, std::find(stash.begin(), stash.end(), k));
575 const_iterator find(key_type
const& k)
const {
576 size_t hash_of_k = hashfun(k);
577 for (uint32_t i = 0;i < CuckooK; ++i) {
578 index_type idx = compute_hash(hash_of_k, i);
579 if (keyeq(data[idx], k))
return const_iterator(
this, data_begin() + idx);
581 return const_iterator(
this, std::find(stash.begin(), stash.end(), k));
584 size_t count(key_type
const& k)
const {
585 size_t hash_of_k = hashfun(k);
586 for (uint32_t i = 0;i < CuckooK; ++i) {
587 index_type idx = compute_hash(hash_of_k, i);
588 if (keyeq(data[idx], k))
return 1;
590 for (
size_t i = 0; i < stash.size(); ++i) {
591 if (stash[i] == k)
return 1;
597 void erase(iterator iter) {
598 if (iter.in_stash ==
false) {
599 if (!keyeq(*(iter.vec_iter), illegalkey)) {
601 replace_in_vector(&(*(iter.vec_iter)), illegalkey);
606 else if (iter.stash_iter != stash.end()) {
608 stash.erase(iter.stash_iter);
612 void erase(key_type
const& k) {
613 iterator iter = find(k);
614 if (iter != end()) erase(iter);
618 std::swap(illegalkey, other.illegalkey);
619 std::swap(numel, other.numel);
620 std::swap(maxstash, other.maxstash);
621 std::swap(data, other.data);
622 std::swap(datalen, other.datalen);
623 std::swap(stash, other.stash);
624 std::swap(drng, other.drng);
625 std::swap(kranddist, other.kranddist);
626 std::swap(hashfun, other.hashfun);
627 std::swap(keyeq, other.keyeq);
628 std::swap(mask, other.mask);
631 key_equal key_eq()
const {
641 float load_factor()
const {
642 return (
float)numel / (datalen + stash.size());
646 oarc << size_t(numel);
652 for (
size_t i = 0;i < datalen; ++i) data[i] = illegalkey;
657 reserve(tmpnumel * 1.5);
659 deserialize_iterator<iarchive, non_const_value_type>
660 (iarc, insert_iterator(
this));
661 ASSERT_EQ(numel, tmpnumel);