1 #ifndef GRAPHLAB_UTIL_HOPSCOTCH_TABLE_HPP
2 #define GRAPHLAB_UTIL_HOPSCOTCH_TABLE_HPP
10 #include <graphlab/parallel/pthread_tools.hpp>
11 #include <graphlab/parallel/atomic.hpp>
13 #include <boost/functional/hash.hpp>
14 #define _HOPSCOTCH_TABLE_DEFAULT_HASH boost::hash<T>
42 bool Synchronized =
true,
43 typename Hash = _HOPSCOTCH_TABLE_DEFAULT_HASH,
44 typename KeyEqual = std::equal_to<T> >
49 typedef size_t size_type;
69 element():hasdata(
false), field(0) { }
72 std::vector<element> data;
73 std::vector<simple_spinlock> locks;
81 static uint64_t next_powerof2(uint64_t val) {
83 val = val | (val >> 1);
84 val = val | (val >> 2);
85 val = val | (val >> 4);
86 val = val | (val >> 8);
87 val = val | (val >> 16);
88 val = val | (val >> 32);
95 size_t compute_hash(
const value_type& d)
const {
96 size_t state = hashfun(d);
97 #ifdef HAS_BUILTIN_CRC32
98 return __builtin_ia32_crc32di(0, state);
104 state += (state << 12);
105 state ^= (state >> 22);
106 state += (state << 4);
107 state ^= (state >> 9);
108 state += (state << 10);
109 state ^= (state >> 2);
110 state += (state << 7);
111 state ^= (state >> 12);
118 static size_t associated_lock_id(
size_t idx) {
134 Hash hashfun = Hash(),
135 KeyEqual equalfun = KeyEqual()):
136 data(next_powerof2(len) + 32),
137 locks(Synchronized ? data.
size() / 32 + 1 : 0),
141 mask(data.
size() - 32 - 1) {
160 typedef std::forward_iterator_tag iterator_category;
162 typedef size_t difference_type;
163 typedef value_type* pointer;
164 typedef value_type& reference;
169 typename std::vector<element>::const_iterator iter;
175 while(iter != ptr->data.end() && !iter->hasdata) {
188 reference operator*() {
192 pointer operator->() {
193 return &(iter->elem);
197 return ptr == it.ptr && iter == it.iter;
201 return !((*this) == iter);
207 typename std::vector<element>::const_iterator iter):
208 ptr(table), iter(iter) { }
219 typedef std::forward_iterator_tag iterator_category;
221 typedef size_t difference_type;
222 typedef value_type* pointer;
223 typedef value_type& reference;
228 typename std::vector<element>::iterator iter;
240 while(iter != ptr->data.end() && !iter->hasdata) {
253 reference operator*() {
257 pointer operator->() {
258 return &(iter->elem);
261 bool operator==(
const iterator it)
const {
262 return ptr == it.ptr && iter == it.iter;
265 bool operator!=(
const iterator iter)
const {
266 return !((*this) == iter);
272 typename std::vector<element>::iterator iter):
273 ptr(table), iter(iter) { }
283 typedef std::forward_iterator_tag iterator_category;
313 template <
bool IsSynchronized>
318 size_t lockid = associated_lock_id(target);
320 if (IsSynchronized) {
321 locks[lockid + 1].lock();
322 locks[lockid].lock();
324 iterator iter = find_impl(newdata, target);
325 if (iter !=
end() && overwrite) {
326 iter.iter->elem = newdata;
328 if (IsSynchronized) {
329 locks[lockid].unlock();
330 locks[lockid + 1].unlock();
340 template <
bool IsSynchronized>
341 iterator insert_impl(
const value_type& newdata,
bool overwrite =
true) {
343 size_t target = compute_hash(newdata) & mask;
345 iterator ret = try_find_and_overwrite<IsSynchronized>(newdata,
348 if (ret !=
end())
return ret;
352 size_t shift_target = target;
354 size_t limit = std::min(data.size(), target + 31 * 20);
356 for (;shift_target < limit; shift_target++) {
357 if (data[shift_target].hasdata ==
false) {
358 lockid = associated_lock_id(shift_target);
359 if (IsSynchronized) locks[lockid].lock();
361 if (data[shift_target].hasdata ==
false) {
370 if (IsSynchronized) locks[lockid].unlock();
377 return iterator(
this, data.end());
381 while(shift_target - target >= 31) {
390 if (IsSynchronized && lockid > 0) locks[lockid - 1].lock();
392 for (
size_t i = 30; i >= 1; --i) {
394 if (data[shift_target - i].field) {
395 r = __builtin_ctz(data[shift_target - i].field);
398 size_t new_shift_target = shift_target - i + r;
399 assert(data[new_shift_target].hasdata);
400 data[shift_target].elem = data[new_shift_target].elem;
401 data[shift_target].hasdata =
true;
402 data[new_shift_target].hasdata =
false;
403 data[new_shift_target].elem = T();
406 data[shift_target - i].field =
407 (data[shift_target - i].field & ~((uint32_t)1 << r))
408 | ((uint32_t)1 << i);
409 shift_target = new_shift_target;
418 if (IsSynchronized) {
419 locks[lockid].unlock();
420 if (lockid > 0) locks[lockid - 1].unlock();
422 return iterator(
this, data.end());
426 if (IsSynchronized) {
429 size_t newlockid = associated_lock_id(shift_target);
430 assert(newlockid == lockid || newlockid == lockid - 1);
431 if (newlockid == lockid) {
432 if (lockid > 0) locks[lockid - 1].unlock();
434 else if (newlockid == lockid - 1) {
435 locks[lockid].unlock();
443 if (IsSynchronized && lockid > 0) locks[lockid - 1].lock();
444 data[shift_target].elem = newdata;
445 data[target].field |= (1 << (shift_target - target));
446 data[shift_target].hasdata =
true;
447 if (IsSynchronized) {
449 if (lockid > 0) locks[lockid - 1].unlock();
450 locks[lockid].unlock();
455 return iterator(
this, data.begin() + shift_target);
465 const_iterator find_impl(
const value_type& key,
size_t target)
const {
466 uint32_t field = data[target].field;
468 int r = __builtin_ctz(field);
469 if (data[target + r].hasdata &&
470 key_eq()(data[target + r].elem, key)) {
471 return const_iterator(
this, data.begin() + target + r);
475 field &= ~(((uint32_t)1 << r));
478 return const_iterator(
this, data.end());
482 iterator find_impl(
const value_type& key,
size_t target) {
483 const_iterator iter = ((
const hopscotch_table*)(
this))->find_impl(key, target);
484 return iterator(
this, data.begin() + (iter.iter - data.begin()));
495 return insert_impl<false>(newdata);
506 return insert_impl<false>(newdata,
false);
517 size_t target = compute_hash(key) & mask;
518 return find_impl(key, target);
528 return iterator(
this, data.begin() + (iter.iter - data.begin()));
533 for (
size_t i = 0;i < data.size(); ++i) {
534 data[i].hasdata =
false;
545 if (iter.iter == data.end())
return false;
546 assert(iter.iter->hasdata);
547 size_t target = compute_hash(iter.iter->elem) & mask;
548 size_t offset = iter.iter - (data.begin() + target);
551 iter.iter->hasdata =
false;
553 data[target].field &= ~((uint32_t)1 << offset);
565 typename std::vector<element>::iterator iter = data.begin();
566 while (iter != data.end() && !iter->hasdata) {
575 typename std::vector<element>::iterator iter = data.begin();
576 while (iter != data.end() && !iter->hasdata) {
612 float load_factor()
const {
628 float load_factor_sync()
const {
634 locks.resize(other.locks.size());
635 hashfun = other.hashfun;
636 equalfun = other.equalfun;
649 return insert_impl<Synchronized>(t).iter != data.end();
659 return insert_impl<Synchronized>(t,
false).iter != data.end();
674 element e = *(iter.iter);
675 if (e.hasdata &&
key_eq()(e.elem, t)) {
676 return std::make_pair(
true, e.elem);
681 return std::make_pair(
false, T());
685 size_t target = compute_hash(t) & mask;
686 size_t lockid = associated_lock_id(target);
687 locks[lockid + 1].lock();
688 locks[lockid].lock();
689 iter = find_impl(t, target);
691 element e = *(iter.iter);
692 locks[lockid].unlock();
693 locks[lockid + 1].unlock();
694 assert(e.hasdata &&
key_eq()(e.elem, t));
695 return std::make_pair(
true, e.elem);
698 locks[lockid].unlock();
699 locks[lockid + 1].unlock();
700 return std::make_pair(
false, T());
714 size_t target = compute_hash(t) & mask;
715 size_t lockid = associated_lock_id(target);
717 locks[lockid + 1].lock();
718 locks[lockid].lock();
721 locks[lockid].unlock();
722 locks[lockid + 1].unlock();
726 assert(iter.iter->hasdata);
727 size_t offset = iter.iter - (data.begin() + target);
730 iter.iter->hasdata =
false;
732 data[target].field &= ~((uint32_t)1 << offset);
733 locks[lockid + 1].unlock();
734 locks[lockid].unlock();