clang: PTHLexer.cpp Source File

Go to the documentation of this file.
00001 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the PTHLexer interface.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "clang/Lex/PTHLexer.h"
00015 #include "clang/Basic/FileManager.h"
00016 #include "clang/Basic/FileSystemStatCache.h"
00017 #include "clang/Basic/IdentifierTable.h"
00018 #include "clang/Basic/TokenKinds.h"
00019 #include "clang/Lex/LexDiagnostic.h"
00020 #include "clang/Lex/PTHManager.h"
00021 #include "clang/Lex/Preprocessor.h"
00022 #include "clang/Lex/Token.h"
00023 #include "llvm/ADT/StringExtras.h"
00024 #include "llvm/ADT/StringMap.h"
00025 #include "llvm/Support/EndianStream.h"
00026 #include "llvm/Support/MemoryBuffer.h"
00027 #include <memory>
00028 #include <system_error>
00029 using namespace clang;
00030 
00031 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
00032 
00033 //===----------------------------------------------------------------------===//
00034 // PTHLexer methods.
00035 //===----------------------------------------------------------------------===//
00036 
00037 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
00038                    const unsigned char *ppcond, PTHManager &PM)
00039   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
00040     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
00041 
00042   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
00043 }
00044 
00045 bool PTHLexer::Lex(Token& Tok) {
00046   //===--------------------------------------==//
00047   // Read the raw token data.
00048   //===--------------------------------------==//
00049   using namespace llvm::support;
00050 
00051   // Shadow CurPtr into an automatic variable.
00052   const unsigned char *CurPtrShadow = CurPtr;
00053 
00054   // Read in the data for the token.
00055   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
00056   uint32_t IdentifierID =
00057       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
00058   uint32_t FileOffset =
00059       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
00060 
00061   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
00062   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
00063   uint32_t Len = Word0 >> 16;
00064 
00065   CurPtr = CurPtrShadow;
00066 
00067   //===--------------------------------------==//
00068   // Construct the token itself.
00069   //===--------------------------------------==//
00070 
00071   Tok.startToken();
00072   Tok.setKind(TKind);
00073   Tok.setFlag(TFlags);
00074   assert(!LexingRawMode);
00075   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
00076   Tok.setLength(Len);
00077 
00078   // Handle identifiers.
00079   if (Tok.isLiteral()) {
00080     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
00081   }
00082   else if (IdentifierID) {
00083     MIOpt.ReadToken();
00084     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
00085 
00086     Tok.setIdentifierInfo(II);
00087 
00088     // Change the kind of this identifier to the appropriate token kind, e.g.
00089     // turning "for" into a keyword.
00090     Tok.setKind(II->getTokenID());
00091 
00092     if (II->isHandleIdentifierCase())
00093       return PP->HandleIdentifier(Tok);
00094 
00095     return true;
00096   }
00097 
00098   //===--------------------------------------==//
00099   // Process the token.
00100   //===--------------------------------------==//
00101   if (TKind == tok::eof) {
00102     // Save the end-of-file token.
00103     EofToken = Tok;
00104 
00105     assert(!ParsingPreprocessorDirective);
00106     assert(!LexingRawMode);
00107 
00108     return LexEndOfFile(Tok);
00109   }
00110 
00111   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
00112     LastHashTokPtr = CurPtr - StoredTokenSize;
00113     assert(!LexingRawMode);
00114     PP->HandleDirective(Tok);
00115 
00116     return false;
00117   }
00118 
00119   if (TKind == tok::eod) {
00120     assert(ParsingPreprocessorDirective);
00121     ParsingPreprocessorDirective = false;
00122     return true;
00123   }
00124 
00125   MIOpt.ReadToken();
00126   return true;
00127 }
00128 
00129 bool PTHLexer::LexEndOfFile(Token &Result) {
00130   // If we hit the end of the file while parsing a preprocessor directive,
00131   // end the preprocessor directive first.  The next token returned will
00132   // then be the end of file.
00133   if (ParsingPreprocessorDirective) {
00134     ParsingPreprocessorDirective = false; // Done parsing the "line".
00135     return true;  // Have a token.
00136   }
00137   
00138   assert(!LexingRawMode);
00139 
00140   // If we are in a #if directive, emit an error.
00141   while (!ConditionalStack.empty()) {
00142     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
00143       PP->Diag(ConditionalStack.back().IfLoc,
00144                diag::err_pp_unterminated_conditional);
00145     ConditionalStack.pop_back();
00146   }
00147 
00148   // Finally, let the preprocessor handle this.
00149   return PP->HandleEndOfFile(Result);
00150 }
00151 
00152 // FIXME: We can just grab the last token instead of storing a copy
00153 // into EofToken.
00154 void PTHLexer::getEOF(Token& Tok) {
00155   assert(EofToken.is(tok::eof));
00156   Tok = EofToken;
00157 }
00158 
00159 void PTHLexer::DiscardToEndOfLine() {
00160   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
00161          "Must be in a preprocessing directive!");
00162 
00163   // We assume that if the preprocessor wishes to discard to the end of
00164   // the line that it also means to end the current preprocessor directive.
00165   ParsingPreprocessorDirective = false;
00166 
00167   // Skip tokens by only peeking at their token kind and the flags.
00168   // We don't need to actually reconstruct full tokens from the token buffer.
00169   // This saves some copies and it also reduces IdentifierInfo* lookup.
00170   const unsigned char* p = CurPtr;
00171   while (1) {
00172     // Read the token kind.  Are we at the end of the file?
00173     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
00174     if (x == tok::eof) break;
00175 
00176     // Read the token flags.  Are we at the start of the next line?
00177     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
00178     if (y & Token::StartOfLine) break;
00179 
00180     // Skip to the next token.
00181     p += StoredTokenSize;
00182   }
00183 
00184   CurPtr = p;
00185 }
00186 
00187 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
00188 bool PTHLexer::SkipBlock() {
00189   using namespace llvm::support;
00190   assert(CurPPCondPtr && "No cached PP conditional information.");
00191   assert(LastHashTokPtr && "No known '#' token.");
00192 
00193   const unsigned char *HashEntryI = nullptr;
00194   uint32_t TableIdx;
00195 
00196   do {
00197     // Read the token offset from the side-table.
00198     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
00199 
00200     // Read the target table index from the side-table.
00201     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
00202 
00203     // Compute the actual memory address of the '#' token data for this entry.
00204     HashEntryI = TokBuf + Offset;
00205 
00206     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
00207     //  contain nested blocks.  In the side-table we can jump over these
00208     //  nested blocks instead of doing a linear search if the next "sibling"
00209     //  entry is not at a location greater than LastHashTokPtr.
00210     if (HashEntryI < LastHashTokPtr && TableIdx) {
00211       // In the side-table we are still at an entry for a '#' token that
00212       // is earlier than the last one we saw.  Check if the location we would
00213       // stride gets us closer.
00214       const unsigned char* NextPPCondPtr =
00215         PPCond + TableIdx*(sizeof(uint32_t)*2);
00216       assert(NextPPCondPtr >= CurPPCondPtr);
00217       // Read where we should jump to.
00218       const unsigned char *HashEntryJ =
00219           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
00220 
00221       if (HashEntryJ <= LastHashTokPtr) {
00222         // Jump directly to the next entry in the side table.
00223         HashEntryI = HashEntryJ;
00224         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
00225         CurPPCondPtr = NextPPCondPtr;
00226       }
00227     }
00228   }
00229   while (HashEntryI < LastHashTokPtr);
00230   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
00231   assert(TableIdx && "No jumping from #endifs.");
00232 
00233   // Update our side-table iterator.
00234   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
00235   assert(NextPPCondPtr >= CurPPCondPtr);
00236   CurPPCondPtr = NextPPCondPtr;
00237 
00238   // Read where we should jump to.
00239   HashEntryI =
00240       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
00241   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
00242 
00243   // By construction NextIdx will be zero if this is a #endif.  This is useful
00244   // to know to obviate lexing another token.
00245   bool isEndif = NextIdx == 0;
00246 
00247   // This case can occur when we see something like this:
00248   //
00249   //  #if ...
00250   //   /* a comment or nothing */
00251   //  #elif
00252   //
00253   // If we are skipping the first #if block it will be the case that CurPtr
00254   // already points 'elif'.  Just return.
00255 
00256   if (CurPtr > HashEntryI) {
00257     assert(CurPtr == HashEntryI + StoredTokenSize);
00258     // Did we reach a #endif?  If so, go ahead and consume that token as well.
00259     if (isEndif)
00260       CurPtr += StoredTokenSize * 2;
00261     else
00262       LastHashTokPtr = HashEntryI;
00263 
00264     return isEndif;
00265   }
00266 
00267   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
00268   CurPtr = HashEntryI;
00269 
00270   // Update the location of the last observed '#'.  This is useful if we
00271   // are skipping multiple blocks.
00272   LastHashTokPtr = CurPtr;
00273 
00274   // Skip the '#' token.
00275   assert(((tok::TokenKind)*CurPtr) == tok::hash);
00276   CurPtr += StoredTokenSize;
00277 
00278   // Did we reach a #endif?  If so, go ahead and consume that token as well.
00279   if (isEndif) {
00280     CurPtr += StoredTokenSize * 2;
00281   }
00282 
00283   return isEndif;
00284 }
00285 
00286 SourceLocation PTHLexer::getSourceLocation() {
00287   // getSourceLocation is not on the hot path.  It is used to get the location
00288   // of the next token when transitioning back to this lexer when done
00289   // handling a #included file.  Just read the necessary data from the token
00290   // data buffer to construct the SourceLocation object.
00291   // NOTE: This is a virtual function; hence it is defined out-of-line.
00292   using namespace llvm::support;
00293 
00294   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
00295   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
00296   return FileStartLoc.getLocWithOffset(Offset);
00297 }
00298 
00299 //===----------------------------------------------------------------------===//
00300 // PTH file lookup: map from strings to file data.
00301 //===----------------------------------------------------------------------===//
00302 
00303 /// PTHFileLookup - This internal data structure is used by the PTHManager
00304 ///  to map from FileEntry objects managed by FileManager to offsets within
00305 ///  the PTH file.
00306 namespace {
00307 class PTHFileData {
00308   const uint32_t TokenOff;
00309   const uint32_t PPCondOff;
00310 public:
00311   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
00312     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
00313 
00314   uint32_t getTokenOffset() const { return TokenOff; }
00315   uint32_t getPPCondOffset() const { return PPCondOff; }
00316 };
00317 
00318 
00319 class PTHFileLookupCommonTrait {
00320 public:
00321   typedef std::pair<unsigned char, const char*> internal_key_type;
00322   typedef unsigned hash_value_type;
00323   typedef unsigned offset_type;
00324 
00325   static hash_value_type ComputeHash(internal_key_type x) {
00326     return llvm::HashString(x.second);
00327   }
00328 
00329   static std::pair<unsigned, unsigned>
00330   ReadKeyDataLength(const unsigned char*& d) {
00331     using namespace llvm::support;
00332     unsigned keyLen =
00333         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
00334     unsigned dataLen = (unsigned) *(d++);
00335     return std::make_pair(keyLen, dataLen);
00336   }
00337 
00338   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
00339     unsigned char k = *(d++); // Read the entry kind.
00340     return std::make_pair(k, (const char*) d);
00341   }
00342 };
00343 
00344 } // end anonymous namespace
00345 
00346 class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
00347 public:
00348   typedef const FileEntry* external_key_type;
00349   typedef PTHFileData      data_type;
00350 
00351   static internal_key_type GetInternalKey(const FileEntry* FE) {
00352     return std::make_pair((unsigned char) 0x1, FE->getName());
00353   }
00354 
00355   static bool EqualKey(internal_key_type a, internal_key_type b) {
00356     return a.first == b.first && strcmp(a.second, b.second) == 0;
00357   }
00358 
00359   static PTHFileData ReadData(const internal_key_type& k,
00360                               const unsigned char* d, unsigned) {
00361     assert(k.first == 0x1 && "Only file lookups can match!");
00362     using namespace llvm::support;
00363     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
00364     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
00365     return PTHFileData(x, y);
00366   }
00367 };
00368 
00369 class PTHManager::PTHStringLookupTrait {
00370 public:
00371   typedef uint32_t data_type;
00372   typedef const std::pair<const char*, unsigned> external_key_type;
00373   typedef external_key_type internal_key_type;
00374   typedef uint32_t hash_value_type;
00375   typedef unsigned offset_type;
00376 
00377   static bool EqualKey(const internal_key_type& a,
00378                        const internal_key_type& b) {
00379     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
00380                                   : false;
00381   }
00382 
00383   static hash_value_type ComputeHash(const internal_key_type& a) {
00384     return llvm::HashString(StringRef(a.first, a.second));
00385   }
00386 
00387   // This hopefully will just get inlined and removed by the optimizer.
00388   static const internal_key_type&
00389   GetInternalKey(const external_key_type& x) { return x; }
00390 
00391   static std::pair<unsigned, unsigned>
00392   ReadKeyDataLength(const unsigned char*& d) {
00393     using namespace llvm::support;
00394     return std::make_pair(
00395         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
00396         sizeof(uint32_t));
00397   }
00398 
00399   static std::pair<const char*, unsigned>
00400   ReadKey(const unsigned char* d, unsigned n) {
00401       assert(n >= 2 && d[n-1] == '\0');
00402       return std::make_pair((const char*) d, n-1);
00403     }
00404 
00405   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
00406                            unsigned) {
00407     using namespace llvm::support;
00408     return endian::readNext<uint32_t, little, unaligned>(d);
00409   }
00410 };
00411 
00412 //===----------------------------------------------------------------------===//
00413 // PTHManager methods.
00414 //===----------------------------------------------------------------------===//
00415 
00416 PTHManager::PTHManager(
00417     std::unique_ptr<const llvm::MemoryBuffer> buf,
00418     std::unique_ptr<PTHFileLookup> fileLookup, const unsigned char *idDataTable,
00419     std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> perIDCache,
00420     std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
00421     const unsigned char *spellingBase, const char *originalSourceFile)
00422     : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
00423       FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
00424       StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
00425       SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
00426 
00427 PTHManager::~PTHManager() {
00428 }
00429 
00430 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
00431   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
00432 }
00433 
00434 PTHManager *PTHManager::Create(const std::string &file,
00435                                DiagnosticsEngine &Diags) {
00436   // Memory map the PTH file.
00437   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
00438       llvm::MemoryBuffer::getFile(file);
00439 
00440   if (!FileOrErr) {
00441     // FIXME: Add ec.message() to this diag.
00442     Diags.Report(diag::err_invalid_pth_file) << file;
00443     return nullptr;
00444   }
00445   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
00446 
00447   using namespace llvm::support;
00448 
00449   // Get the buffer ranges and check if there are at least three 32-bit
00450   // words at the end of the file.
00451   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
00452   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
00453 
00454   // Check the prologue of the file.
00455   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
00456       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
00457     Diags.Report(diag::err_invalid_pth_file) << file;
00458     return nullptr;
00459   }
00460 
00461   // Read the PTH version.
00462   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
00463   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
00464 
00465   if (Version < PTHManager::Version) {
00466     InvalidPTH(Diags,
00467         Version < PTHManager::Version
00468         ? "PTH file uses an older PTH format that is no longer supported"
00469         : "PTH file uses a newer PTH format that cannot be read");
00470     return nullptr;
00471   }
00472 
00473   // Compute the address of the index table at the end of the PTH file.
00474   const unsigned char *PrologueOffset = p;
00475 
00476   if (PrologueOffset >= BufEnd) {
00477     Diags.Report(diag::err_invalid_pth_file) << file;
00478     return nullptr;
00479   }
00480 
00481   // Construct the file lookup table.  This will be used for mapping from
00482   // FileEntry*'s to cached tokens.
00483   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
00484   const unsigned char *FileTable =
00485       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
00486 
00487   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
00488     Diags.Report(diag::err_invalid_pth_file) << file;
00489     return nullptr; // FIXME: Proper error diagnostic?
00490   }
00491 
00492   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
00493 
00494   // Warn if the PTH file is empty.  We still want to create a PTHManager
00495   // as the PTH could be used with -include-pth.
00496   if (FL->isEmpty())
00497     InvalidPTH(Diags, "PTH file contains no cached source data");
00498 
00499   // Get the location of the table mapping from persistent ids to the
00500   // data needed to reconstruct identifiers.
00501   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
00502   const unsigned char *IData =
00503       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
00504 
00505   if (!(IData >= BufBeg && IData < BufEnd)) {
00506     Diags.Report(diag::err_invalid_pth_file) << file;
00507     return nullptr;
00508   }
00509 
00510   // Get the location of the hashtable mapping between strings and
00511   // persistent IDs.
00512   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
00513   const unsigned char *StringIdTable =
00514       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
00515   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
00516     Diags.Report(diag::err_invalid_pth_file) << file;
00517     return nullptr;
00518   }
00519 
00520   std::unique_ptr<PTHStringIdLookup> SL(
00521       PTHStringIdLookup::Create(StringIdTable, BufBeg));
00522 
00523   // Get the location of the spelling cache.
00524   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
00525   const unsigned char *spellingBase =
00526       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
00527   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
00528     Diags.Report(diag::err_invalid_pth_file) << file;
00529     return nullptr;
00530   }
00531 
00532   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
00533   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
00534 
00535   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
00536   // so that we in the best case only zero out memory once when the OS returns
00537   // us new pages.
00538   std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> PerIDCache;
00539 
00540   if (NumIds) {
00541     PerIDCache.reset((IdentifierInfo **)calloc(NumIds, sizeof(PerIDCache[0])));
00542     if (!PerIDCache) {
00543       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
00544       return nullptr;
00545     }
00546   }
00547 
00548   // Compute the address of the original source file.
00549   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
00550   unsigned len =
00551       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
00552   if (!len) originalSourceBase = nullptr;
00553 
00554   // Create the new PTHManager.
00555   return new PTHManager(std::move(File), std::move(FL), IData,
00556                         std::move(PerIDCache), std::move(SL), NumIds,
00557                         spellingBase, (const char *)originalSourceBase);
00558 }
00559 
00560 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
00561   using namespace llvm::support;
00562   // Look in the PTH file for the string data for the IdentifierInfo object.
00563   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
00564   const unsigned char *IDData =
00565       (const unsigned char *)Buf->getBufferStart() +
00566       endian::readNext<uint32_t, little, aligned>(TableEntry);
00567   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
00568 
00569   // Allocate the object.
00570   std::pair<IdentifierInfo,const unsigned char*> *Mem =
00571     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
00572 
00573   Mem->second = IDData;
00574   assert(IDData[0] != '\0');
00575   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
00576 
00577   // Store the new IdentifierInfo in the cache.
00578   PerIDCache[PersistentID] = II;
00579   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
00580   return II;
00581 }
00582 
00583 IdentifierInfo* PTHManager::get(StringRef Name) {
00584   // Double check our assumption that the last character isn't '\0'.
00585   assert(Name.empty() || Name.back() != '\0');
00586   PTHStringIdLookup::iterator I =
00587       StringIdLookup->find(std::make_pair(Name.data(), Name.size()));
00588   if (I == StringIdLookup->end()) // No identifier found?
00589     return nullptr;
00590 
00591   // Match found.  Return the identifier!
00592   assert(*I > 0);
00593   return GetIdentifierInfo(*I-1);
00594 }
00595 
00596 PTHLexer *PTHManager::CreateLexer(FileID FID) {
00597   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
00598   if (!FE)
00599     return nullptr;
00600 
00601   using namespace llvm::support;
00602 
00603   // Lookup the FileEntry object in our file lookup data structure.  It will
00604   // return a variant that indicates whether or not there is an offset within
00605   // the PTH file that contains cached tokens.
00606   PTHFileLookup::iterator I = FileLookup->find(FE);
00607 
00608   if (I == FileLookup->end()) // No tokens available?
00609     return nullptr;
00610 
00611   const PTHFileData& FileData = *I;
00612 
00613   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
00614   // Compute the offset of the token data within the buffer.
00615   const unsigned char* data = BufStart + FileData.getTokenOffset();
00616 
00617   // Get the location of pp-conditional table.
00618   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
00619   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
00620   if (Len == 0) ppcond = nullptr;
00621 
00622   assert(PP && "No preprocessor set yet!");
00623   return new PTHLexer(*PP, FID, data, ppcond, *this);
00624 }
00625 
00626 //===----------------------------------------------------------------------===//
00627 // 'stat' caching.
00628 //===----------------------------------------------------------------------===//
00629 
00630 namespace {
00631 class PTHStatData {
00632 public:
00633   const bool HasData;
00634   uint64_t Size;
00635   time_t ModTime;
00636   llvm::sys::fs::UniqueID UniqueID;
00637   bool IsDirectory;
00638 
00639   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
00640               bool IsDirectory)
00641       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
00642         IsDirectory(IsDirectory) {}
00643 
00644   PTHStatData() : HasData(false) {}
00645 };
00646 
00647 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
00648 public:
00649   typedef const char* external_key_type;  // const char*
00650   typedef PTHStatData data_type;
00651 
00652   static internal_key_type GetInternalKey(const char *path) {
00653     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
00654     return std::make_pair((unsigned char) 0x0, path);
00655   }
00656 
00657   static bool EqualKey(internal_key_type a, internal_key_type b) {
00658     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
00659     // just the paths.
00660     return strcmp(a.second, b.second) == 0;
00661   }
00662 
00663   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
00664                             unsigned) {
00665 
00666     if (k.first /* File or Directory */) {
00667       bool IsDirectory = true;
00668       if (k.first == 0x1 /* File */) {
00669         IsDirectory = false;
00670         d += 4 * 2; // Skip the first 2 words.
00671       }
00672 
00673       using namespace llvm::support;
00674 
00675       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
00676       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
00677       llvm::sys::fs::UniqueID UniqueID(Device, File);
00678       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
00679       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
00680       return data_type(Size, ModTime, UniqueID, IsDirectory);
00681     }
00682 
00683     // Negative stat.  Don't read anything.
00684     return data_type();
00685   }
00686 };
00687 } // end anonymous namespace
00688 
00689 namespace clang {
00690 class PTHStatCache : public FileSystemStatCache {
00691   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
00692   CacheTy Cache;
00693 
00694 public:
00695   PTHStatCache(PTHManager::PTHFileLookup &FL)
00696       : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
00697               FL.getBase()) {}
00698 
00699   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
00700                        std::unique_ptr<vfs::File> *F,
00701                        vfs::FileSystem &FS) override {
00702     // Do the lookup for the file's data in the PTH file.
00703     CacheTy::iterator I = Cache.find(Path);
00704 
00705     // If we don't get a hit in the PTH file just forward to 'stat'.
00706     if (I == Cache.end())
00707       return statChained(Path, Data, isFile, F, FS);
00708 
00709     const PTHStatData &D = *I;
00710 
00711     if (!D.HasData)
00712       return CacheMissing;
00713 
00714     Data.Name = Path;
00715     Data.Size = D.Size;
00716     Data.ModTime = D.ModTime;
00717     Data.UniqueID = D.UniqueID;
00718     Data.IsDirectory = D.IsDirectory;
00719     Data.IsNamedPipe = false;
00720     Data.InPCH = true;
00721 
00722     return CacheExists;
00723   }
00724 };
00725 }
00726 
00727 std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
00728   return llvm::make_unique<PTHStatCache>(*FileLookup);
00729 }