LLVM API Documentation
00001 //===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the SparseSet class derived from the version described in 00011 // Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters 00012 // on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec. 1993. 00013 // 00014 // A sparse set holds a small number of objects identified by integer keys from 00015 // a moderately sized universe. The sparse set uses more memory than other 00016 // containers in order to provide faster operations. 00017 // 00018 //===----------------------------------------------------------------------===// 00019 00020 #ifndef LLVM_ADT_SPARSESET_H 00021 #define LLVM_ADT_SPARSESET_H 00022 00023 #include "llvm/ADT/STLExtras.h" 00024 #include "llvm/ADT/SmallVector.h" 00025 #include "llvm/Support/DataTypes.h" 00026 #include <limits> 00027 00028 namespace llvm { 00029 00030 /// SparseSetValTraits - Objects in a SparseSet are identified by keys that can 00031 /// be uniquely converted to a small integer less than the set's universe. This 00032 /// class allows the set to hold values that differ from the set's key type as 00033 /// long as an index can still be derived from the value. SparseSet never 00034 /// directly compares ValueT, only their indices, so it can map keys to 00035 /// arbitrary values. SparseSetValTraits computes the index from the value 00036 /// object. To compute the index from a key, SparseSet uses a separate 00037 /// KeyFunctorT template argument. 00038 /// 00039 /// A simple type declaration, SparseSet<Type>, handles these cases: 00040 /// - unsigned key, identity index, identity value 00041 /// - unsigned key, identity index, fat value providing getSparseSetIndex() 00042 /// 00043 /// The type declaration SparseSet<Type, UnaryFunction> handles: 00044 /// - unsigned key, remapped index, identity value (virtual registers) 00045 /// - pointer key, pointer-derived index, identity value (node+ID) 00046 /// - pointer key, pointer-derived index, fat value with getSparseSetIndex() 00047 /// 00048 /// Only other, unexpected cases require specializing SparseSetValTraits. 00049 /// 00050 /// For best results, ValueT should not require a destructor. 00051 /// 00052 template<typename ValueT> 00053 struct SparseSetValTraits { 00054 static unsigned getValIndex(const ValueT &Val) { 00055 return Val.getSparseSetIndex(); 00056 } 00057 }; 00058 00059 /// SparseSetValFunctor - Helper class for selecting SparseSetValTraits. The 00060 /// generic implementation handles ValueT classes which either provide 00061 /// getSparseSetIndex() or specialize SparseSetValTraits<>. 00062 /// 00063 template<typename KeyT, typename ValueT, typename KeyFunctorT> 00064 struct SparseSetValFunctor { 00065 unsigned operator()(const ValueT &Val) const { 00066 return SparseSetValTraits<ValueT>::getValIndex(Val); 00067 } 00068 }; 00069 00070 /// SparseSetValFunctor<KeyT, KeyT> - Helper class for the common case of 00071 /// identity key/value sets. 00072 template<typename KeyT, typename KeyFunctorT> 00073 struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> { 00074 unsigned operator()(const KeyT &Key) const { 00075 return KeyFunctorT()(Key); 00076 } 00077 }; 00078 00079 /// SparseSet - Fast set implmentation for objects that can be identified by 00080 /// small unsigned keys. 00081 /// 00082 /// SparseSet allocates memory proportional to the size of the key universe, so 00083 /// it is not recommended for building composite data structures. It is useful 00084 /// for algorithms that require a single set with fast operations. 00085 /// 00086 /// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast 00087 /// clear() and iteration as fast as a vector. The find(), insert(), and 00088 /// erase() operations are all constant time, and typically faster than a hash 00089 /// table. The iteration order doesn't depend on numerical key values, it only 00090 /// depends on the order of insert() and erase() operations. When no elements 00091 /// have been erased, the iteration order is the insertion order. 00092 /// 00093 /// Compared to BitVector, SparseSet<unsigned> uses 8x-40x more memory, but 00094 /// offers constant-time clear() and size() operations as well as fast 00095 /// iteration independent on the size of the universe. 00096 /// 00097 /// SparseSet contains a dense vector holding all the objects and a sparse 00098 /// array holding indexes into the dense vector. Most of the memory is used by 00099 /// the sparse array which is the size of the key universe. The SparseT 00100 /// template parameter provides a space/speed tradeoff for sets holding many 00101 /// elements. 00102 /// 00103 /// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse 00104 /// array uses 4 x Universe bytes. 00105 /// 00106 /// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache 00107 /// lines, but the sparse array is 4x smaller. N is the number of elements in 00108 /// the set. 00109 /// 00110 /// For sets that may grow to thousands of elements, SparseT should be set to 00111 /// uint16_t or uint32_t. 00112 /// 00113 /// @tparam ValueT The type of objects in the set. 00114 /// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT. 00115 /// @tparam SparseT An unsigned integer type. See above. 00116 /// 00117 template<typename ValueT, 00118 typename KeyFunctorT = llvm::identity<unsigned>, 00119 typename SparseT = uint8_t> 00120 class SparseSet { 00121 static_assert(std::numeric_limits<SparseT>::is_integer && 00122 !std::numeric_limits<SparseT>::is_signed, 00123 "SparseT must be an unsigned integer type"); 00124 00125 typedef typename KeyFunctorT::argument_type KeyT; 00126 typedef SmallVector<ValueT, 8> DenseT; 00127 typedef unsigned size_type; 00128 DenseT Dense; 00129 SparseT *Sparse; 00130 unsigned Universe; 00131 KeyFunctorT KeyIndexOf; 00132 SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf; 00133 00134 // Disable copy construction and assignment. 00135 // This data structure is not meant to be used that way. 00136 SparseSet(const SparseSet&) LLVM_DELETED_FUNCTION; 00137 SparseSet &operator=(const SparseSet&) LLVM_DELETED_FUNCTION; 00138 00139 public: 00140 typedef ValueT value_type; 00141 typedef ValueT &reference; 00142 typedef const ValueT &const_reference; 00143 typedef ValueT *pointer; 00144 typedef const ValueT *const_pointer; 00145 00146 SparseSet() : Sparse(nullptr), Universe(0) {} 00147 ~SparseSet() { free(Sparse); } 00148 00149 /// setUniverse - Set the universe size which determines the largest key the 00150 /// set can hold. The universe must be sized before any elements can be 00151 /// added. 00152 /// 00153 /// @param U Universe size. All object keys must be less than U. 00154 /// 00155 void setUniverse(unsigned U) { 00156 // It's not hard to resize the universe on a non-empty set, but it doesn't 00157 // seem like a likely use case, so we can add that code when we need it. 00158 assert(empty() && "Can only resize universe on an empty map"); 00159 // Hysteresis prevents needless reallocations. 00160 if (U >= Universe/4 && U <= Universe) 00161 return; 00162 free(Sparse); 00163 // The Sparse array doesn't actually need to be initialized, so malloc 00164 // would be enough here, but that will cause tools like valgrind to 00165 // complain about branching on uninitialized data. 00166 Sparse = reinterpret_cast<SparseT*>(calloc(U, sizeof(SparseT))); 00167 Universe = U; 00168 } 00169 00170 // Import trivial vector stuff from DenseT. 00171 typedef typename DenseT::iterator iterator; 00172 typedef typename DenseT::const_iterator const_iterator; 00173 00174 const_iterator begin() const { return Dense.begin(); } 00175 const_iterator end() const { return Dense.end(); } 00176 iterator begin() { return Dense.begin(); } 00177 iterator end() { return Dense.end(); } 00178 00179 /// empty - Returns true if the set is empty. 00180 /// 00181 /// This is not the same as BitVector::empty(). 00182 /// 00183 bool empty() const { return Dense.empty(); } 00184 00185 /// size - Returns the number of elements in the set. 00186 /// 00187 /// This is not the same as BitVector::size() which returns the size of the 00188 /// universe. 00189 /// 00190 size_type size() const { return Dense.size(); } 00191 00192 /// clear - Clears the set. This is a very fast constant time operation. 00193 /// 00194 void clear() { 00195 // Sparse does not need to be cleared, see find(). 00196 Dense.clear(); 00197 } 00198 00199 /// findIndex - Find an element by its index. 00200 /// 00201 /// @param Idx A valid index to find. 00202 /// @returns An iterator to the element identified by key, or end(). 00203 /// 00204 iterator findIndex(unsigned Idx) { 00205 assert(Idx < Universe && "Key out of range"); 00206 const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u; 00207 for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) { 00208 const unsigned FoundIdx = ValIndexOf(Dense[i]); 00209 assert(FoundIdx < Universe && "Invalid key in set. Did object mutate?"); 00210 if (Idx == FoundIdx) 00211 return begin() + i; 00212 // Stride is 0 when SparseT >= unsigned. We don't need to loop. 00213 if (!Stride) 00214 break; 00215 } 00216 return end(); 00217 } 00218 00219 /// find - Find an element by its key. 00220 /// 00221 /// @param Key A valid key to find. 00222 /// @returns An iterator to the element identified by key, or end(). 00223 /// 00224 iterator find(const KeyT &Key) { 00225 return findIndex(KeyIndexOf(Key)); 00226 } 00227 00228 const_iterator find(const KeyT &Key) const { 00229 return const_cast<SparseSet*>(this)->findIndex(KeyIndexOf(Key)); 00230 } 00231 00232 /// count - Returns 1 if this set contains an element identified by Key, 00233 /// 0 otherwise. 00234 /// 00235 size_type count(const KeyT &Key) const { 00236 return find(Key) == end() ? 0 : 1; 00237 } 00238 00239 /// insert - Attempts to insert a new element. 00240 /// 00241 /// If Val is successfully inserted, return (I, true), where I is an iterator 00242 /// pointing to the newly inserted element. 00243 /// 00244 /// If the set already contains an element with the same key as Val, return 00245 /// (I, false), where I is an iterator pointing to the existing element. 00246 /// 00247 /// Insertion invalidates all iterators. 00248 /// 00249 std::pair<iterator, bool> insert(const ValueT &Val) { 00250 unsigned Idx = ValIndexOf(Val); 00251 iterator I = findIndex(Idx); 00252 if (I != end()) 00253 return std::make_pair(I, false); 00254 Sparse[Idx] = size(); 00255 Dense.push_back(Val); 00256 return std::make_pair(end() - 1, true); 00257 } 00258 00259 /// array subscript - If an element already exists with this key, return it. 00260 /// Otherwise, automatically construct a new value from Key, insert it, 00261 /// and return the newly inserted element. 00262 ValueT &operator[](const KeyT &Key) { 00263 return *insert(ValueT(Key)).first; 00264 } 00265 00266 /// erase - Erases an existing element identified by a valid iterator. 00267 /// 00268 /// This invalidates all iterators, but erase() returns an iterator pointing 00269 /// to the next element. This makes it possible to erase selected elements 00270 /// while iterating over the set: 00271 /// 00272 /// for (SparseSet::iterator I = Set.begin(); I != Set.end();) 00273 /// if (test(*I)) 00274 /// I = Set.erase(I); 00275 /// else 00276 /// ++I; 00277 /// 00278 /// Note that end() changes when elements are erased, unlike std::list. 00279 /// 00280 iterator erase(iterator I) { 00281 assert(unsigned(I - begin()) < size() && "Invalid iterator"); 00282 if (I != end() - 1) { 00283 *I = Dense.back(); 00284 unsigned BackIdx = ValIndexOf(Dense.back()); 00285 assert(BackIdx < Universe && "Invalid key in set. Did object mutate?"); 00286 Sparse[BackIdx] = I - begin(); 00287 } 00288 // This depends on SmallVector::pop_back() not invalidating iterators. 00289 // std::vector::pop_back() doesn't give that guarantee. 00290 Dense.pop_back(); 00291 return I; 00292 } 00293 00294 /// erase - Erases an element identified by Key, if it exists. 00295 /// 00296 /// @param Key The key identifying the element to erase. 00297 /// @returns True when an element was erased, false if no element was found. 00298 /// 00299 bool erase(const KeyT &Key) { 00300 iterator I = find(Key); 00301 if (I == end()) 00302 return false; 00303 erase(I); 00304 return true; 00305 } 00306 00307 }; 00308 00309 } // end namespace llvm 00310 00311 #endif