LLVM API Documentation
00001 //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements a hash set that can be used to remove duplication of 00011 // nodes in a graph. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/ADT/FoldingSet.h" 00016 #include "llvm/ADT/Hashing.h" 00017 #include "llvm/Support/Allocator.h" 00018 #include "llvm/Support/ErrorHandling.h" 00019 #include "llvm/Support/Host.h" 00020 #include "llvm/Support/MathExtras.h" 00021 #include <cassert> 00022 #include <cstring> 00023 using namespace llvm; 00024 00025 //===----------------------------------------------------------------------===// 00026 // FoldingSetNodeIDRef Implementation 00027 00028 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, 00029 /// used to lookup the node in the FoldingSetImpl. 00030 unsigned FoldingSetNodeIDRef::ComputeHash() const { 00031 return static_cast<unsigned>(hash_combine_range(Data, Data+Size)); 00032 } 00033 00034 bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { 00035 if (Size != RHS.Size) return false; 00036 return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; 00037 } 00038 00039 /// Used to compare the "ordering" of two nodes as defined by the 00040 /// profiled bits and their ordering defined by memcmp(). 00041 bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { 00042 if (Size != RHS.Size) 00043 return Size < RHS.Size; 00044 return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0; 00045 } 00046 00047 //===----------------------------------------------------------------------===// 00048 // FoldingSetNodeID Implementation 00049 00050 /// Add* - Add various data types to Bit data. 00051 /// 00052 void FoldingSetNodeID::AddPointer(const void *Ptr) { 00053 // Note: this adds pointers to the hash using sizes and endianness that 00054 // depend on the host. It doesn't matter however, because hashing on 00055 // pointer values in inherently unstable. Nothing should depend on the 00056 // ordering of nodes in the folding set. 00057 Bits.append(reinterpret_cast<unsigned *>(&Ptr), 00058 reinterpret_cast<unsigned *>(&Ptr+1)); 00059 } 00060 void FoldingSetNodeID::AddInteger(signed I) { 00061 Bits.push_back(I); 00062 } 00063 void FoldingSetNodeID::AddInteger(unsigned I) { 00064 Bits.push_back(I); 00065 } 00066 void FoldingSetNodeID::AddInteger(long I) { 00067 AddInteger((unsigned long)I); 00068 } 00069 void FoldingSetNodeID::AddInteger(unsigned long I) { 00070 if (sizeof(long) == sizeof(int)) 00071 AddInteger(unsigned(I)); 00072 else if (sizeof(long) == sizeof(long long)) { 00073 AddInteger((unsigned long long)I); 00074 } else { 00075 llvm_unreachable("unexpected sizeof(long)"); 00076 } 00077 } 00078 void FoldingSetNodeID::AddInteger(long long I) { 00079 AddInteger((unsigned long long)I); 00080 } 00081 void FoldingSetNodeID::AddInteger(unsigned long long I) { 00082 AddInteger(unsigned(I)); 00083 if ((uint64_t)(unsigned)I != I) 00084 Bits.push_back(unsigned(I >> 32)); 00085 } 00086 00087 void FoldingSetNodeID::AddString(StringRef String) { 00088 unsigned Size = String.size(); 00089 Bits.push_back(Size); 00090 if (!Size) return; 00091 00092 unsigned Units = Size / 4; 00093 unsigned Pos = 0; 00094 const unsigned *Base = (const unsigned*) String.data(); 00095 00096 // If the string is aligned do a bulk transfer. 00097 if (!((intptr_t)Base & 3)) { 00098 Bits.append(Base, Base + Units); 00099 Pos = (Units + 1) * 4; 00100 } else { 00101 // Otherwise do it the hard way. 00102 // To be compatible with above bulk transfer, we need to take endianness 00103 // into account. 00104 if (sys::IsBigEndianHost) { 00105 for (Pos += 4; Pos <= Size; Pos += 4) { 00106 unsigned V = ((unsigned char)String[Pos - 4] << 24) | 00107 ((unsigned char)String[Pos - 3] << 16) | 00108 ((unsigned char)String[Pos - 2] << 8) | 00109 (unsigned char)String[Pos - 1]; 00110 Bits.push_back(V); 00111 } 00112 } else { 00113 assert(sys::IsLittleEndianHost && "Unexpected host endianness"); 00114 for (Pos += 4; Pos <= Size; Pos += 4) { 00115 unsigned V = ((unsigned char)String[Pos - 1] << 24) | 00116 ((unsigned char)String[Pos - 2] << 16) | 00117 ((unsigned char)String[Pos - 3] << 8) | 00118 (unsigned char)String[Pos - 4]; 00119 Bits.push_back(V); 00120 } 00121 } 00122 } 00123 00124 // With the leftover bits. 00125 unsigned V = 0; 00126 // Pos will have overshot size by 4 - #bytes left over. 00127 // No need to take endianness into account here - this is always executed. 00128 switch (Pos - Size) { 00129 case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 00130 case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 00131 case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 00132 default: return; // Nothing left. 00133 } 00134 00135 Bits.push_back(V); 00136 } 00137 00138 // AddNodeID - Adds the Bit data of another ID to *this. 00139 void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { 00140 Bits.append(ID.Bits.begin(), ID.Bits.end()); 00141 } 00142 00143 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 00144 /// lookup the node in the FoldingSetImpl. 00145 unsigned FoldingSetNodeID::ComputeHash() const { 00146 return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); 00147 } 00148 00149 /// operator== - Used to compare two nodes to each other. 00150 /// 00151 bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { 00152 return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 00153 } 00154 00155 /// operator== - Used to compare two nodes to each other. 00156 /// 00157 bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { 00158 return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; 00159 } 00160 00161 /// Used to compare the "ordering" of two nodes as defined by the 00162 /// profiled bits and their ordering defined by memcmp(). 00163 bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const { 00164 return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 00165 } 00166 00167 bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { 00168 return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; 00169 } 00170 00171 /// Intern - Copy this node's data to a memory region allocated from the 00172 /// given allocator and return a FoldingSetNodeIDRef describing the 00173 /// interned data. 00174 FoldingSetNodeIDRef 00175 FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { 00176 unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); 00177 std::uninitialized_copy(Bits.begin(), Bits.end(), New); 00178 return FoldingSetNodeIDRef(New, Bits.size()); 00179 } 00180 00181 //===----------------------------------------------------------------------===// 00182 /// Helper functions for FoldingSetImpl. 00183 00184 /// GetNextPtr - In order to save space, each bucket is a 00185 /// singly-linked-list. In order to make deletion more efficient, we make 00186 /// the list circular, so we can delete a node without computing its hash. 00187 /// The problem with this is that the start of the hash buckets are not 00188 /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 00189 /// use GetBucketPtr when this happens. 00190 static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { 00191 // The low bit is set if this is the pointer back to the bucket. 00192 if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 00193 return nullptr; 00194 00195 return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); 00196 } 00197 00198 00199 /// testing. 00200 static void **GetBucketPtr(void *NextInBucketPtr) { 00201 intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 00202 assert((Ptr & 1) && "Not a bucket pointer"); 00203 return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 00204 } 00205 00206 /// GetBucketFor - Hash the specified node ID and return the hash bucket for 00207 /// the specified ID. 00208 static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { 00209 // NumBuckets is always a power of 2. 00210 unsigned BucketNum = Hash & (NumBuckets-1); 00211 return Buckets + BucketNum; 00212 } 00213 00214 /// AllocateBuckets - Allocated initialized bucket memory. 00215 static void **AllocateBuckets(unsigned NumBuckets) { 00216 void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*))); 00217 // Set the very last bucket to be a non-null "pointer". 00218 Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 00219 return Buckets; 00220 } 00221 00222 //===----------------------------------------------------------------------===// 00223 // FoldingSetImpl Implementation 00224 00225 FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { 00226 assert(5 < Log2InitSize && Log2InitSize < 32 && 00227 "Initial hash table size out of range"); 00228 NumBuckets = 1 << Log2InitSize; 00229 Buckets = AllocateBuckets(NumBuckets); 00230 NumNodes = 0; 00231 } 00232 FoldingSetImpl::~FoldingSetImpl() { 00233 free(Buckets); 00234 } 00235 void FoldingSetImpl::clear() { 00236 // Set all but the last bucket to null pointers. 00237 memset(Buckets, 0, NumBuckets*sizeof(void*)); 00238 00239 // Set the very last bucket to be a non-null "pointer". 00240 Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 00241 00242 // Reset the node count to zero. 00243 NumNodes = 0; 00244 } 00245 00246 /// GrowHashTable - Double the size of the hash table and rehash everything. 00247 /// 00248 void FoldingSetImpl::GrowHashTable() { 00249 void **OldBuckets = Buckets; 00250 unsigned OldNumBuckets = NumBuckets; 00251 NumBuckets <<= 1; 00252 00253 // Clear out new buckets. 00254 Buckets = AllocateBuckets(NumBuckets); 00255 NumNodes = 0; 00256 00257 // Walk the old buckets, rehashing nodes into their new place. 00258 FoldingSetNodeID TempID; 00259 for (unsigned i = 0; i != OldNumBuckets; ++i) { 00260 void *Probe = OldBuckets[i]; 00261 if (!Probe) continue; 00262 while (Node *NodeInBucket = GetNextPtr(Probe)) { 00263 // Figure out the next link, remove NodeInBucket from the old link. 00264 Probe = NodeInBucket->getNextInBucket(); 00265 NodeInBucket->SetNextInBucket(nullptr); 00266 00267 // Insert the node into the new bucket, after recomputing the hash. 00268 InsertNode(NodeInBucket, 00269 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID), 00270 Buckets, NumBuckets)); 00271 TempID.clear(); 00272 } 00273 } 00274 00275 free(OldBuckets); 00276 } 00277 00278 /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 00279 /// return it. If not, return the insertion token that will make insertion 00280 /// faster. 00281 FoldingSetImpl::Node 00282 *FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 00283 void *&InsertPos) { 00284 unsigned IDHash = ID.ComputeHash(); 00285 void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); 00286 void *Probe = *Bucket; 00287 00288 InsertPos = nullptr; 00289 00290 FoldingSetNodeID TempID; 00291 while (Node *NodeInBucket = GetNextPtr(Probe)) { 00292 if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) 00293 return NodeInBucket; 00294 TempID.clear(); 00295 00296 Probe = NodeInBucket->getNextInBucket(); 00297 } 00298 00299 // Didn't find the node, return null with the bucket as the InsertPos. 00300 InsertPos = Bucket; 00301 return nullptr; 00302 } 00303 00304 /// InsertNode - Insert the specified node into the folding set, knowing that it 00305 /// is not already in the map. InsertPos must be obtained from 00306 /// FindNodeOrInsertPos. 00307 void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { 00308 assert(!N->getNextInBucket()); 00309 // Do we need to grow the hashtable? 00310 if (NumNodes+1 > NumBuckets*2) { 00311 GrowHashTable(); 00312 FoldingSetNodeID TempID; 00313 InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets); 00314 } 00315 00316 ++NumNodes; 00317 00318 /// The insert position is actually a bucket pointer. 00319 void **Bucket = static_cast<void**>(InsertPos); 00320 00321 void *Next = *Bucket; 00322 00323 // If this is the first insertion into this bucket, its next pointer will be 00324 // null. Pretend as if it pointed to itself, setting the low bit to indicate 00325 // that it is a pointer to the bucket. 00326 if (!Next) 00327 Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 00328 00329 // Set the node's next pointer, and make the bucket point to the node. 00330 N->SetNextInBucket(Next); 00331 *Bucket = N; 00332 } 00333 00334 /// RemoveNode - Remove a node from the folding set, returning true if one was 00335 /// removed or false if the node was not in the folding set. 00336 bool FoldingSetImpl::RemoveNode(Node *N) { 00337 // Because each bucket is a circular list, we don't need to compute N's hash 00338 // to remove it. 00339 void *Ptr = N->getNextInBucket(); 00340 if (!Ptr) return false; // Not in folding set. 00341 00342 --NumNodes; 00343 N->SetNextInBucket(nullptr); 00344 00345 // Remember what N originally pointed to, either a bucket or another node. 00346 void *NodeNextPtr = Ptr; 00347 00348 // Chase around the list until we find the node (or bucket) which points to N. 00349 while (true) { 00350 if (Node *NodeInBucket = GetNextPtr(Ptr)) { 00351 // Advance pointer. 00352 Ptr = NodeInBucket->getNextInBucket(); 00353 00354 // We found a node that points to N, change it to point to N's next node, 00355 // removing N from the list. 00356 if (Ptr == N) { 00357 NodeInBucket->SetNextInBucket(NodeNextPtr); 00358 return true; 00359 } 00360 } else { 00361 void **Bucket = GetBucketPtr(Ptr); 00362 Ptr = *Bucket; 00363 00364 // If we found that the bucket points to N, update the bucket to point to 00365 // whatever is next. 00366 if (Ptr == N) { 00367 *Bucket = NodeNextPtr; 00368 return true; 00369 } 00370 } 00371 } 00372 } 00373 00374 /// GetOrInsertNode - If there is an existing simple Node exactly 00375 /// equal to the specified node, return it. Otherwise, insert 'N' and it 00376 /// instead. 00377 FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { 00378 FoldingSetNodeID ID; 00379 GetNodeProfile(N, ID); 00380 void *IP; 00381 if (Node *E = FindNodeOrInsertPos(ID, IP)) 00382 return E; 00383 InsertNode(N, IP); 00384 return N; 00385 } 00386 00387 //===----------------------------------------------------------------------===// 00388 // FoldingSetIteratorImpl Implementation 00389 00390 FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 00391 // Skip to the first non-null non-self-cycle bucket. 00392 while (*Bucket != reinterpret_cast<void*>(-1) && 00393 (!*Bucket || !GetNextPtr(*Bucket))) 00394 ++Bucket; 00395 00396 NodePtr = static_cast<FoldingSetNode*>(*Bucket); 00397 } 00398 00399 void FoldingSetIteratorImpl::advance() { 00400 // If there is another link within this bucket, go to it. 00401 void *Probe = NodePtr->getNextInBucket(); 00402 00403 if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 00404 NodePtr = NextNodeInBucket; 00405 else { 00406 // Otherwise, this is the last link in this bucket. 00407 void **Bucket = GetBucketPtr(Probe); 00408 00409 // Skip to the next non-null non-self-cycle bucket. 00410 do { 00411 ++Bucket; 00412 } while (*Bucket != reinterpret_cast<void*>(-1) && 00413 (!*Bucket || !GetNextPtr(*Bucket))); 00414 00415 NodePtr = static_cast<FoldingSetNode*>(*Bucket); 00416 } 00417 } 00418 00419 //===----------------------------------------------------------------------===// 00420 // FoldingSetBucketIteratorImpl Implementation 00421 00422 FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 00423 Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket; 00424 }