LLVM API Documentation

StringMap.cpp
Go to the documentation of this file.
00001 //===--- StringMap.cpp - String Hash table map implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the StringMap class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "llvm/ADT/StringMap.h"
00015 #include "llvm/ADT/StringExtras.h"
00016 #include "llvm/Support/Compiler.h"
00017 #include <cassert>
00018 using namespace llvm;
00019 
00020 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
00021   ItemSize = itemSize;
00022   
00023   // If a size is specified, initialize the table with that many buckets.
00024   if (InitSize) {
00025     init(InitSize);
00026     return;
00027   }
00028   
00029   // Otherwise, initialize it with zero buckets to avoid the allocation.
00030   TheTable = nullptr;
00031   NumBuckets = 0;
00032   NumItems = 0;
00033   NumTombstones = 0;
00034 }
00035 
00036 void StringMapImpl::init(unsigned InitSize) {
00037   assert((InitSize & (InitSize-1)) == 0 &&
00038          "Init Size must be a power of 2 or zero!");
00039   NumBuckets = InitSize ? InitSize : 16;
00040   NumItems = 0;
00041   NumTombstones = 0;
00042   
00043   TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
00044                                            sizeof(StringMapEntryBase **) +
00045                                            sizeof(unsigned));
00046 
00047   // Allocate one extra bucket, set it to look filled so the iterators stop at
00048   // end.
00049   TheTable[NumBuckets] = (StringMapEntryBase*)2;
00050 }
00051 
00052 
00053 /// LookupBucketFor - Look up the bucket that the specified string should end
00054 /// up in.  If it already exists as a key in the map, the Item pointer for the
00055 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
00056 /// case, the FullHashValue field of the bucket will be set to the hash value
00057 /// of the string.
00058 unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
00059   unsigned HTSize = NumBuckets;
00060   if (HTSize == 0) {  // Hash table unallocated so far?
00061     init(16);
00062     HTSize = NumBuckets;
00063   }
00064   unsigned FullHashValue = HashString(Name);
00065   unsigned BucketNo = FullHashValue & (HTSize-1);
00066   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
00067 
00068   unsigned ProbeAmt = 1;
00069   int FirstTombstone = -1;
00070   while (1) {
00071     StringMapEntryBase *BucketItem = TheTable[BucketNo];
00072     // If we found an empty bucket, this key isn't in the table yet, return it.
00073     if (LLVM_LIKELY(!BucketItem)) {
00074       // If we found a tombstone, we want to reuse the tombstone instead of an
00075       // empty bucket.  This reduces probing.
00076       if (FirstTombstone != -1) {
00077         HashTable[FirstTombstone] = FullHashValue;
00078         return FirstTombstone;
00079       }
00080       
00081       HashTable[BucketNo] = FullHashValue;
00082       return BucketNo;
00083     }
00084     
00085     if (BucketItem == getTombstoneVal()) {
00086       // Skip over tombstones.  However, remember the first one we see.
00087       if (FirstTombstone == -1) FirstTombstone = BucketNo;
00088     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
00089       // If the full hash value matches, check deeply for a match.  The common
00090       // case here is that we are only looking at the buckets (for item info
00091       // being non-null and for the full hash value) not at the items.  This
00092       // is important for cache locality.
00093       
00094       // Do the comparison like this because Name isn't necessarily
00095       // null-terminated!
00096       char *ItemStr = (char*)BucketItem+ItemSize;
00097       if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
00098         // We found a match!
00099         return BucketNo;
00100       }
00101     }
00102     
00103     // Okay, we didn't find the item.  Probe to the next bucket.
00104     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
00105     
00106     // Use quadratic probing, it has fewer clumping artifacts than linear
00107     // probing and has good cache behavior in the common case.
00108     ++ProbeAmt;
00109   }
00110 }
00111 
00112 
00113 /// FindKey - Look up the bucket that contains the specified key. If it exists
00114 /// in the map, return the bucket number of the key.  Otherwise return -1.
00115 /// This does not modify the map.
00116 int StringMapImpl::FindKey(StringRef Key) const {
00117   unsigned HTSize = NumBuckets;
00118   if (HTSize == 0) return -1;  // Really empty table?
00119   unsigned FullHashValue = HashString(Key);
00120   unsigned BucketNo = FullHashValue & (HTSize-1);
00121   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
00122 
00123   unsigned ProbeAmt = 1;
00124   while (1) {
00125     StringMapEntryBase *BucketItem = TheTable[BucketNo];
00126     // If we found an empty bucket, this key isn't in the table yet, return.
00127     if (LLVM_LIKELY(!BucketItem))
00128       return -1;
00129     
00130     if (BucketItem == getTombstoneVal()) {
00131       // Ignore tombstones.
00132     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
00133       // If the full hash value matches, check deeply for a match.  The common
00134       // case here is that we are only looking at the buckets (for item info
00135       // being non-null and for the full hash value) not at the items.  This
00136       // is important for cache locality.
00137       
00138       // Do the comparison like this because NameStart isn't necessarily
00139       // null-terminated!
00140       char *ItemStr = (char*)BucketItem+ItemSize;
00141       if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
00142         // We found a match!
00143         return BucketNo;
00144       }
00145     }
00146     
00147     // Okay, we didn't find the item.  Probe to the next bucket.
00148     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
00149     
00150     // Use quadratic probing, it has fewer clumping artifacts than linear
00151     // probing and has good cache behavior in the common case.
00152     ++ProbeAmt;
00153   }
00154 }
00155 
00156 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
00157 /// delete it.  This aborts if the value isn't in the table.
00158 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
00159   const char *VStr = (char*)V + ItemSize;
00160   StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
00161   (void)V2;
00162   assert(V == V2 && "Didn't find key?");
00163 }
00164 
00165 /// RemoveKey - Remove the StringMapEntry for the specified key from the
00166 /// table, returning it.  If the key is not in the table, this returns null.
00167 StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
00168   int Bucket = FindKey(Key);
00169   if (Bucket == -1) return nullptr;
00170   
00171   StringMapEntryBase *Result = TheTable[Bucket];
00172   TheTable[Bucket] = getTombstoneVal();
00173   --NumItems;
00174   ++NumTombstones;
00175   assert(NumItems + NumTombstones <= NumBuckets);
00176 
00177   return Result;
00178 }
00179 
00180 
00181 
00182 /// RehashTable - Grow the table, redistributing values into the buckets with
00183 /// the appropriate mod-of-hashtable-size.
00184 unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
00185   unsigned NewSize;
00186   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
00187 
00188   // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
00189   // the buckets are empty (meaning that many are filled with tombstones),
00190   // grow/rehash the table.
00191   if (NumItems*4 > NumBuckets*3) {
00192     NewSize = NumBuckets*2;
00193   } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) {
00194     NewSize = NumBuckets;
00195   } else {
00196     return BucketNo;
00197   }
00198 
00199   unsigned NewBucketNo = BucketNo;
00200   // Allocate one extra bucket which will always be non-empty.  This allows the
00201   // iterators to stop at end.
00202   StringMapEntryBase **NewTableArray =
00203     (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
00204                                              sizeof(unsigned));
00205   unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
00206   NewTableArray[NewSize] = (StringMapEntryBase*)2;
00207 
00208   // Rehash all the items into their new buckets.  Luckily :) we already have
00209   // the hash values available, so we don't have to rehash any strings.
00210   for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
00211     StringMapEntryBase *Bucket = TheTable[I];
00212     if (Bucket && Bucket != getTombstoneVal()) {
00213       // Fast case, bucket available.
00214       unsigned FullHash = HashTable[I];
00215       unsigned NewBucket = FullHash & (NewSize-1);
00216       if (!NewTableArray[NewBucket]) {
00217         NewTableArray[FullHash & (NewSize-1)] = Bucket;
00218         NewHashArray[FullHash & (NewSize-1)] = FullHash;
00219         if (I == BucketNo)
00220           NewBucketNo = NewBucket;
00221         continue;
00222       }
00223       
00224       // Otherwise probe for a spot.
00225       unsigned ProbeSize = 1;
00226       do {
00227         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
00228       } while (NewTableArray[NewBucket]);
00229       
00230       // Finally found a slot.  Fill it in.
00231       NewTableArray[NewBucket] = Bucket;
00232       NewHashArray[NewBucket] = FullHash;
00233       if (I == BucketNo)
00234         NewBucketNo = NewBucket;
00235     }
00236   }
00237   
00238   free(TheTable);
00239   
00240   TheTable = NewTableArray;
00241   NumBuckets = NewSize;
00242   NumTombstones = 0;
00243   return NewBucketNo;
00244 }