clang: Preprocessor.cpp Source File

Go to the documentation of this file.
00001 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file implements the Preprocessor interface.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 //
00014 // Options to support:
00015 //   -H       - Print the name of each header file used.
00016 //   -d[DNI] - Dump various things.
00017 //   -fworking-directory - #line's with preprocessor's working dir.
00018 //   -fpreprocessed
00019 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
00020 //   -W*
00021 //   -w
00022 //
00023 // Messages to emit:
00024 //   "Multiple include guards may be useful for:\n"
00025 //
00026 //===----------------------------------------------------------------------===//
00027 
00028 #include "clang/Lex/Preprocessor.h"
00029 #include "clang/Basic/FileManager.h"
00030 #include "clang/Basic/FileSystemStatCache.h"
00031 #include "clang/Basic/SourceManager.h"
00032 #include "clang/Basic/TargetInfo.h"
00033 #include "clang/Lex/CodeCompletionHandler.h"
00034 #include "clang/Lex/ExternalPreprocessorSource.h"
00035 #include "clang/Lex/HeaderSearch.h"
00036 #include "clang/Lex/LexDiagnostic.h"
00037 #include "clang/Lex/LiteralSupport.h"
00038 #include "clang/Lex/MacroArgs.h"
00039 #include "clang/Lex/MacroInfo.h"
00040 #include "clang/Lex/ModuleLoader.h"
00041 #include "clang/Lex/Pragma.h"
00042 #include "clang/Lex/PreprocessingRecord.h"
00043 #include "clang/Lex/PreprocessorOptions.h"
00044 #include "clang/Lex/ScratchBuffer.h"
00045 #include "llvm/ADT/APFloat.h"
00046 #include "llvm/ADT/STLExtras.h"
00047 #include "llvm/ADT/SmallString.h"
00048 #include "llvm/ADT/StringExtras.h"
00049 #include "llvm/Support/Capacity.h"
00050 #include "llvm/Support/ConvertUTF.h"
00051 #include "llvm/Support/MemoryBuffer.h"
00052 #include "llvm/Support/raw_ostream.h"
00053 using namespace clang;
00054 
00055 //===----------------------------------------------------------------------===//
00056 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
00057 
00058 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
00059                            DiagnosticsEngine &diags, LangOptions &opts,
00060                            SourceManager &SM, HeaderSearch &Headers,
00061                            ModuleLoader &TheModuleLoader,
00062                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
00063                            TranslationUnitKind TUKind)
00064     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
00065       FileMgr(Headers.getFileMgr()), SourceMgr(SM),
00066       ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
00067       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
00068       Identifiers(opts, IILookup),
00069       PragmaHandlers(new PragmaNamespace(StringRef())),
00070       IncrementalProcessing(false), TUKind(TUKind),
00071       CodeComplete(nullptr), CodeCompletionFile(nullptr),
00072       CodeCompletionOffset(0), LastTokenWasAt(false),
00073       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
00074       SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
00075       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
00076       Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr),
00077       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
00078   OwnsHeaderSearch = OwnsHeaders;
00079   
00080   CounterValue = 0; // __COUNTER__ starts at 0.
00081   
00082   // Clear stats.
00083   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
00084   NumIf = NumElse = NumEndif = 0;
00085   NumEnteredSourceFiles = 0;
00086   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
00087   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
00088   MaxIncludeStackDepth = 0;
00089   NumSkipped = 0;
00090   
00091   // Default to discarding comments.
00092   KeepComments = false;
00093   KeepMacroComments = false;
00094   SuppressIncludeNotFoundError = false;
00095   
00096   // Macro expansion is enabled.
00097   DisableMacroExpansion = false;
00098   MacroExpansionInDirectivesOverride = false;
00099   InMacroArgs = false;
00100   InMacroArgPreExpansion = false;
00101   NumCachedTokenLexers = 0;
00102   PragmasEnabled = true;
00103   ParsingIfOrElifDirective = false;
00104   PreprocessedOutput = false;
00105 
00106   CachedLexPos = 0;
00107 
00108   // We haven't read anything from the external source.
00109   ReadMacrosFromExternalSource = false;
00110   
00111   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
00112   // This gets unpoisoned where it is allowed.
00113   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
00114   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
00115   
00116   // Initialize the pragma handlers.
00117   RegisterBuiltinPragmas();
00118   
00119   // Initialize builtin macros like __LINE__ and friends.
00120   RegisterBuiltinMacros();
00121   
00122   if(LangOpts.Borland) {
00123     Ident__exception_info        = getIdentifierInfo("_exception_info");
00124     Ident___exception_info       = getIdentifierInfo("__exception_info");
00125     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
00126     Ident__exception_code        = getIdentifierInfo("_exception_code");
00127     Ident___exception_code       = getIdentifierInfo("__exception_code");
00128     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
00129     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
00130     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
00131     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
00132   } else {
00133     Ident__exception_info = Ident__exception_code = nullptr;
00134     Ident__abnormal_termination = Ident___exception_info = nullptr;
00135     Ident___exception_code = Ident___abnormal_termination = nullptr;
00136     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
00137     Ident_AbnormalTermination = nullptr;
00138   }
00139 }
00140 
00141 Preprocessor::~Preprocessor() {
00142   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
00143 
00144   IncludeMacroStack.clear();
00145 
00146   // Destroy any macro definitions.
00147   while (MacroInfoChain *I = MIChainHead) {
00148     MIChainHead = I->Next;
00149     I->~MacroInfoChain();
00150   }
00151 
00152   // Free any cached macro expanders.
00153   // This populates MacroArgCache, so all TokenLexers need to be destroyed
00154   // before the code below that frees up the MacroArgCache list.
00155   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
00156   CurTokenLexer.reset();
00157 
00158   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
00159     DeserialMIChainHead = I->Next;
00160     I->~DeserializedMacroInfoChain();
00161   }
00162 
00163   // Free any cached MacroArgs.
00164   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
00165     ArgList = ArgList->deallocate();
00166 
00167   // Delete the header search info, if we own it.
00168   if (OwnsHeaderSearch)
00169     delete &HeaderInfo;
00170 }
00171 
00172 void Preprocessor::Initialize(const TargetInfo &Target) {
00173   assert((!this->Target || this->Target == &Target) &&
00174          "Invalid override of target information");
00175   this->Target = &Target;
00176   
00177   // Initialize information about built-ins.
00178   BuiltinInfo.InitializeTarget(Target);
00179   HeaderInfo.setTarget(Target);
00180 }
00181 
00182 void Preprocessor::InitializeForModelFile() {
00183   NumEnteredSourceFiles = 0;
00184 
00185   // Reset pragmas
00186   PragmaHandlersBackup = std::move(PragmaHandlers);
00187   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
00188   RegisterBuiltinPragmas();
00189 
00190   // Reset PredefinesFileID
00191   PredefinesFileID = FileID();
00192 }
00193 
00194 void Preprocessor::FinalizeForModelFile() {
00195   NumEnteredSourceFiles = 1;
00196 
00197   PragmaHandlers = std::move(PragmaHandlersBackup);
00198 }
00199 
00200 void Preprocessor::setPTHManager(PTHManager* pm) {
00201   PTH.reset(pm);
00202   FileMgr.addStatCache(PTH->createStatCache());
00203 }
00204 
00205 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
00206   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
00207                << getSpelling(Tok) << "'";
00208 
00209   if (!DumpFlags) return;
00210 
00211   llvm::errs() << "\t";
00212   if (Tok.isAtStartOfLine())
00213     llvm::errs() << " [StartOfLine]";
00214   if (Tok.hasLeadingSpace())
00215     llvm::errs() << " [LeadingSpace]";
00216   if (Tok.isExpandDisabled())
00217     llvm::errs() << " [ExpandDisabled]";
00218   if (Tok.needsCleaning()) {
00219     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
00220     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
00221                  << "']";
00222   }
00223 
00224   llvm::errs() << "\tLoc=<";
00225   DumpLocation(Tok.getLocation());
00226   llvm::errs() << ">";
00227 }
00228 
00229 void Preprocessor::DumpLocation(SourceLocation Loc) const {
00230   Loc.dump(SourceMgr);
00231 }
00232 
00233 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
00234   llvm::errs() << "MACRO: ";
00235   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
00236     DumpToken(MI.getReplacementToken(i));
00237     llvm::errs() << "  ";
00238   }
00239   llvm::errs() << "\n";
00240 }
00241 
00242 void Preprocessor::PrintStats() {
00243   llvm::errs() << "\n*** Preprocessor Stats:\n";
00244   llvm::errs() << NumDirectives << " directives found:\n";
00245   llvm::errs() << "  " << NumDefined << " #define.\n";
00246   llvm::errs() << "  " << NumUndefined << " #undef.\n";
00247   llvm::errs() << "  #include/#include_next/#import:\n";
00248   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
00249   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
00250   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
00251   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
00252   llvm::errs() << "  " << NumEndif << " #endif.\n";
00253   llvm::errs() << "  " << NumPragma << " #pragma.\n";
00254   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
00255 
00256   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
00257              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
00258              << NumFastMacroExpanded << " on the fast path.\n";
00259   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
00260              << " token paste (##) operations performed, "
00261              << NumFastTokenPaste << " on the fast path.\n";
00262 
00263   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
00264 
00265   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
00266   llvm::errs() << "\n  Macro Expanded Tokens: "
00267                << llvm::capacity_in_bytes(MacroExpandedTokens);
00268   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
00269   llvm::errs() << "\n  Macros: " << llvm::capacity_in_bytes(Macros);
00270   llvm::errs() << "\n  #pragma push_macro Info: "
00271                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
00272   llvm::errs() << "\n  Poison Reasons: "
00273                << llvm::capacity_in_bytes(PoisonReasons);
00274   llvm::errs() << "\n  Comment Handlers: "
00275                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
00276 }
00277 
00278 Preprocessor::macro_iterator
00279 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
00280   if (IncludeExternalMacros && ExternalSource &&
00281       !ReadMacrosFromExternalSource) {
00282     ReadMacrosFromExternalSource = true;
00283     ExternalSource->ReadDefinedMacros();
00284   }
00285 
00286   return Macros.begin();
00287 }
00288 
00289 size_t Preprocessor::getTotalMemory() const {
00290   return BP.getTotalMemory()
00291     + llvm::capacity_in_bytes(MacroExpandedTokens)
00292     + Predefines.capacity() /* Predefines buffer. */
00293     + llvm::capacity_in_bytes(Macros)
00294     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
00295     + llvm::capacity_in_bytes(PoisonReasons)
00296     + llvm::capacity_in_bytes(CommentHandlers);
00297 }
00298 
00299 Preprocessor::macro_iterator
00300 Preprocessor::macro_end(bool IncludeExternalMacros) const {
00301   if (IncludeExternalMacros && ExternalSource &&
00302       !ReadMacrosFromExternalSource) {
00303     ReadMacrosFromExternalSource = true;
00304     ExternalSource->ReadDefinedMacros();
00305   }
00306 
00307   return Macros.end();
00308 }
00309 
00310 /// \brief Compares macro tokens with a specified token value sequence.
00311 static bool MacroDefinitionEquals(const MacroInfo *MI,
00312                                   ArrayRef<TokenValue> Tokens) {
00313   return Tokens.size() == MI->getNumTokens() &&
00314       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
00315 }
00316 
00317 StringRef Preprocessor::getLastMacroWithSpelling(
00318                                     SourceLocation Loc,
00319                                     ArrayRef<TokenValue> Tokens) const {
00320   SourceLocation BestLocation;
00321   StringRef BestSpelling;
00322   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
00323        I != E; ++I) {
00324     if (!I->second->getMacroInfo()->isObjectLike())
00325       continue;
00326     const MacroDirective::DefInfo
00327       Def = I->second->findDirectiveAtLoc(Loc, SourceMgr);
00328     if (!Def)
00329       continue;
00330     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
00331       continue;
00332     SourceLocation Location = Def.getLocation();
00333     // Choose the macro defined latest.
00334     if (BestLocation.isInvalid() ||
00335         (Location.isValid() &&
00336          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
00337       BestLocation = Location;
00338       BestSpelling = I->first->getName();
00339     }
00340   }
00341   return BestSpelling;
00342 }
00343 
00344 void Preprocessor::recomputeCurLexerKind() {
00345   if (CurLexer)
00346     CurLexerKind = CLK_Lexer;
00347   else if (CurPTHLexer)
00348     CurLexerKind = CLK_PTHLexer;
00349   else if (CurTokenLexer)
00350     CurLexerKind = CLK_TokenLexer;
00351   else 
00352     CurLexerKind = CLK_CachingLexer;
00353 }
00354 
00355 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
00356                                           unsigned CompleteLine,
00357                                           unsigned CompleteColumn) {
00358   assert(File);
00359   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
00360   assert(!CodeCompletionFile && "Already set");
00361 
00362   using llvm::MemoryBuffer;
00363 
00364   // Load the actual file's contents.
00365   bool Invalid = false;
00366   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
00367   if (Invalid)
00368     return true;
00369 
00370   // Find the byte position of the truncation point.
00371   const char *Position = Buffer->getBufferStart();
00372   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
00373     for (; *Position; ++Position) {
00374       if (*Position != '\r' && *Position != '\n')
00375         continue;
00376 
00377       // Eat \r\n or \n\r as a single line.
00378       if ((Position[1] == '\r' || Position[1] == '\n') &&
00379           Position[0] != Position[1])
00380         ++Position;
00381       ++Position;
00382       break;
00383     }
00384   }
00385 
00386   Position += CompleteColumn - 1;
00387 
00388   // If pointing inside the preamble, adjust the position at the beginning of
00389   // the file after the preamble.
00390   if (SkipMainFilePreamble.first &&
00391       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
00392     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
00393       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
00394   }
00395 
00396   if (Position > Buffer->getBufferEnd())
00397     Position = Buffer->getBufferEnd();
00398 
00399   CodeCompletionFile = File;
00400   CodeCompletionOffset = Position - Buffer->getBufferStart();
00401 
00402   std::unique_ptr<MemoryBuffer> NewBuffer =
00403       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
00404                                           Buffer->getBufferIdentifier());
00405   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
00406   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
00407   *NewPos = '\0';
00408   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
00409   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
00410 
00411   return false;
00412 }
00413 
00414 void Preprocessor::CodeCompleteNaturalLanguage() {
00415   if (CodeComplete)
00416     CodeComplete->CodeCompleteNaturalLanguage();
00417   setCodeCompletionReached();
00418 }
00419 
00420 /// getSpelling - This method is used to get the spelling of a token into a
00421 /// SmallVector. Note that the returned StringRef may not point to the
00422 /// supplied buffer if a copy can be avoided.
00423 StringRef Preprocessor::getSpelling(const Token &Tok,
00424                                           SmallVectorImpl<char> &Buffer,
00425                                           bool *Invalid) const {
00426   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
00427   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
00428     // Try the fast path.
00429     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
00430       return II->getName();
00431   }
00432 
00433   // Resize the buffer if we need to copy into it.
00434   if (Tok.needsCleaning())
00435     Buffer.resize(Tok.getLength());
00436 
00437   const char *Ptr = Buffer.data();
00438   unsigned Len = getSpelling(Tok, Ptr, Invalid);
00439   return StringRef(Ptr, Len);
00440 }
00441 
00442 /// CreateString - Plop the specified string into a scratch buffer and return a
00443 /// location for it.  If specified, the source location provides a source
00444 /// location for the token.
00445 void Preprocessor::CreateString(StringRef Str, Token &Tok,
00446                                 SourceLocation ExpansionLocStart,
00447                                 SourceLocation ExpansionLocEnd) {
00448   Tok.setLength(Str.size());
00449 
00450   const char *DestPtr;
00451   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
00452 
00453   if (ExpansionLocStart.isValid())
00454     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
00455                                        ExpansionLocEnd, Str.size());
00456   Tok.setLocation(Loc);
00457 
00458   // If this is a raw identifier or a literal token, set the pointer data.
00459   if (Tok.is(tok::raw_identifier))
00460     Tok.setRawIdentifierData(DestPtr);
00461   else if (Tok.isLiteral())
00462     Tok.setLiteralData(DestPtr);
00463 }
00464 
00465 Module *Preprocessor::getCurrentModule() {
00466   if (getLangOpts().CurrentModule.empty())
00467     return nullptr;
00468 
00469   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
00470 }
00471 
00472 //===----------------------------------------------------------------------===//
00473 // Preprocessor Initialization Methods
00474 //===----------------------------------------------------------------------===//
00475 
00476 
00477 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
00478 /// which implicitly adds the builtin defines etc.
00479 void Preprocessor::EnterMainSourceFile() {
00480   // We do not allow the preprocessor to reenter the main file.  Doing so will
00481   // cause FileID's to accumulate information from both runs (e.g. #line
00482   // information) and predefined macros aren't guaranteed to be set properly.
00483   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
00484   FileID MainFileID = SourceMgr.getMainFileID();
00485 
00486   // If MainFileID is loaded it means we loaded an AST file, no need to enter
00487   // a main file.
00488   if (!SourceMgr.isLoadedFileID(MainFileID)) {
00489     // Enter the main file source buffer.
00490     EnterSourceFile(MainFileID, nullptr, SourceLocation());
00491 
00492     // If we've been asked to skip bytes in the main file (e.g., as part of a
00493     // precompiled preamble), do so now.
00494     if (SkipMainFilePreamble.first > 0)
00495       CurLexer->SkipBytes(SkipMainFilePreamble.first, 
00496                           SkipMainFilePreamble.second);
00497     
00498     // Tell the header info that the main file was entered.  If the file is later
00499     // #imported, it won't be re-entered.
00500     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
00501       HeaderInfo.IncrementIncludeCount(FE);
00502   }
00503 
00504   // Preprocess Predefines to populate the initial preprocessor state.
00505   std::unique_ptr<llvm::MemoryBuffer> SB =
00506     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
00507   assert(SB && "Cannot create predefined source buffer");
00508   FileID FID = SourceMgr.createFileID(std::move(SB));
00509   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
00510   setPredefinesFileID(FID);
00511 
00512   // Start parsing the predefines.
00513   EnterSourceFile(FID, nullptr, SourceLocation());
00514 }
00515 
00516 void Preprocessor::EndSourceFile() {
00517   // Notify the client that we reached the end of the source file.
00518   if (Callbacks)
00519     Callbacks->EndOfMainFile();
00520 }
00521 
00522 //===----------------------------------------------------------------------===//
00523 // Lexer Event Handling.
00524 //===----------------------------------------------------------------------===//
00525 
00526 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
00527 /// identifier information for the token and install it into the token,
00528 /// updating the token kind accordingly.
00529 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
00530   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
00531 
00532   // Look up this token, see if it is a macro, or if it is a language keyword.
00533   IdentifierInfo *II;
00534   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
00535     // No cleaning needed, just use the characters from the lexed buffer.
00536     II = getIdentifierInfo(Identifier.getRawIdentifier());
00537   } else {
00538     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
00539     SmallString<64> IdentifierBuffer;
00540     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
00541 
00542     if (Identifier.hasUCN()) {
00543       SmallString<64> UCNIdentifierBuffer;
00544       expandUCNs(UCNIdentifierBuffer, CleanedStr);
00545       II = getIdentifierInfo(UCNIdentifierBuffer);
00546     } else {
00547       II = getIdentifierInfo(CleanedStr);
00548     }
00549   }
00550 
00551   // Update the token info (identifier info and appropriate token kind).
00552   Identifier.setIdentifierInfo(II);
00553   Identifier.setKind(II->getTokenID());
00554 
00555   return II;
00556 }
00557 
00558 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
00559   PoisonReasons[II] = DiagID;
00560 }
00561 
00562 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
00563   assert(Ident__exception_code && Ident__exception_info);
00564   assert(Ident___exception_code && Ident___exception_info);
00565   Ident__exception_code->setIsPoisoned(Poison);
00566   Ident___exception_code->setIsPoisoned(Poison);
00567   Ident_GetExceptionCode->setIsPoisoned(Poison);
00568   Ident__exception_info->setIsPoisoned(Poison);
00569   Ident___exception_info->setIsPoisoned(Poison);
00570   Ident_GetExceptionInfo->setIsPoisoned(Poison);
00571   Ident__abnormal_termination->setIsPoisoned(Poison);
00572   Ident___abnormal_termination->setIsPoisoned(Poison);
00573   Ident_AbnormalTermination->setIsPoisoned(Poison);
00574 }
00575 
00576 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
00577   assert(Identifier.getIdentifierInfo() &&
00578          "Can't handle identifiers without identifier info!");
00579   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
00580     PoisonReasons.find(Identifier.getIdentifierInfo());
00581   if(it == PoisonReasons.end())
00582     Diag(Identifier, diag::err_pp_used_poisoned_id);
00583   else
00584     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
00585 }
00586 
00587 /// HandleIdentifier - This callback is invoked when the lexer reads an
00588 /// identifier.  This callback looks up the identifier in the map and/or
00589 /// potentially macro expands it or turns it into a named token (like 'for').
00590 ///
00591 /// Note that callers of this method are guarded by checking the
00592 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
00593 /// IdentifierInfo methods that compute these properties will need to change to
00594 /// match.
00595 bool Preprocessor::HandleIdentifier(Token &Identifier) {
00596   assert(Identifier.getIdentifierInfo() &&
00597          "Can't handle identifiers without identifier info!");
00598 
00599   IdentifierInfo &II = *Identifier.getIdentifierInfo();
00600 
00601   // If the information about this identifier is out of date, update it from
00602   // the external source.
00603   // We have to treat __VA_ARGS__ in a special way, since it gets
00604   // serialized with isPoisoned = true, but our preprocessor may have
00605   // unpoisoned it if we're defining a C99 macro.
00606   if (II.isOutOfDate()) {
00607     bool CurrentIsPoisoned = false;
00608     if (&II == Ident__VA_ARGS__)
00609       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
00610 
00611     ExternalSource->updateOutOfDateIdentifier(II);
00612     Identifier.setKind(II.getTokenID());
00613 
00614     if (&II == Ident__VA_ARGS__)
00615       II.setIsPoisoned(CurrentIsPoisoned);
00616   }
00617   
00618   // If this identifier was poisoned, and if it was not produced from a macro
00619   // expansion, emit an error.
00620   if (II.isPoisoned() && CurPPLexer) {
00621     HandlePoisonedIdentifier(Identifier);
00622   }
00623 
00624   // If this is a macro to be expanded, do it.
00625   if (MacroDirective *MD = getMacroDirective(&II)) {
00626     MacroInfo *MI = MD->getMacroInfo();
00627     if (!DisableMacroExpansion) {
00628       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
00629         // C99 6.10.3p10: If the preprocessing token immediately after the
00630         // macro name isn't a '(', this macro should not be expanded.
00631         if (!MI->isFunctionLike() || isNextPPTokenLParen())
00632           return HandleMacroExpandedIdentifier(Identifier, MD);
00633       } else {
00634         // C99 6.10.3.4p2 says that a disabled macro may never again be
00635         // expanded, even if it's in a context where it could be expanded in the
00636         // future.
00637         Identifier.setFlag(Token::DisableExpand);
00638         if (MI->isObjectLike() || isNextPPTokenLParen())
00639           Diag(Identifier, diag::pp_disabled_macro_expansion);
00640       }
00641     }
00642   }
00643 
00644   // If this identifier is a keyword in C++11, produce a warning. Don't warn if
00645   // we're not considering macro expansion, since this identifier might be the
00646   // name of a macro.
00647   // FIXME: This warning is disabled in cases where it shouldn't be, like
00648   //   "#define constexpr constexpr", "int constexpr;"
00649   if (II.isCXX11CompatKeyword() && !DisableMacroExpansion) {
00650     Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();
00651     // Don't diagnose this keyword again in this translation unit.
00652     II.setIsCXX11CompatKeyword(false);
00653   }
00654 
00655   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
00656   // then we act as if it is the actual operator and not the textual
00657   // representation of it.
00658   if (II.isCPlusPlusOperatorKeyword())
00659     Identifier.setIdentifierInfo(nullptr);
00660 
00661   // If this is an extension token, diagnose its use.
00662   // We avoid diagnosing tokens that originate from macro definitions.
00663   // FIXME: This warning is disabled in cases where it shouldn't be,
00664   // like "#define TY typeof", "TY(1) x".
00665   if (II.isExtensionToken() && !DisableMacroExpansion)
00666     Diag(Identifier, diag::ext_token_used);
00667   
00668   // If this is the 'import' contextual keyword following an '@', note
00669   // that the next token indicates a module name.
00670   //
00671   // Note that we do not treat 'import' as a contextual
00672   // keyword when we're in a caching lexer, because caching lexers only get
00673   // used in contexts where import declarations are disallowed.
00674   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 
00675       !DisableMacroExpansion && getLangOpts().Modules && 
00676       CurLexerKind != CLK_CachingLexer) {
00677     ModuleImportLoc = Identifier.getLocation();
00678     ModuleImportPath.clear();
00679     ModuleImportExpectsIdentifier = true;
00680     CurLexerKind = CLK_LexAfterModuleImport;
00681   }
00682   return true;
00683 }
00684 
00685 void Preprocessor::Lex(Token &Result) {
00686   // We loop here until a lex function retuns a token; this avoids recursion.
00687   bool ReturnedToken;
00688   do {
00689     switch (CurLexerKind) {
00690     case CLK_Lexer:
00691       ReturnedToken = CurLexer->Lex(Result);
00692       break;
00693     case CLK_PTHLexer:
00694       ReturnedToken = CurPTHLexer->Lex(Result);
00695       break;
00696     case CLK_TokenLexer:
00697       ReturnedToken = CurTokenLexer->Lex(Result);
00698       break;
00699     case CLK_CachingLexer:
00700       CachingLex(Result);
00701       ReturnedToken = true;
00702       break;
00703     case CLK_LexAfterModuleImport:
00704       LexAfterModuleImport(Result);
00705       ReturnedToken = true;
00706       break;
00707     }
00708   } while (!ReturnedToken);
00709 
00710   LastTokenWasAt = Result.is(tok::at);
00711 }
00712 
00713 
00714 /// \brief Lex a token following the 'import' contextual keyword.
00715 ///
00716 void Preprocessor::LexAfterModuleImport(Token &Result) {
00717   // Figure out what kind of lexer we actually have.
00718   recomputeCurLexerKind();
00719   
00720   // Lex the next token.
00721   Lex(Result);
00722 
00723   // The token sequence 
00724   //
00725   //   import identifier (. identifier)*
00726   //
00727   // indicates a module import directive. We already saw the 'import' 
00728   // contextual keyword, so now we're looking for the identifiers.
00729   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
00730     // We expected to see an identifier here, and we did; continue handling
00731     // identifiers.
00732     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
00733                                               Result.getLocation()));
00734     ModuleImportExpectsIdentifier = false;
00735     CurLexerKind = CLK_LexAfterModuleImport;
00736     return;
00737   }
00738   
00739   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
00740   // see the next identifier.
00741   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
00742     ModuleImportExpectsIdentifier = true;
00743     CurLexerKind = CLK_LexAfterModuleImport;
00744     return;
00745   }
00746 
00747   // If we have a non-empty module path, load the named module.
00748   if (!ModuleImportPath.empty() && getLangOpts().Modules) {
00749     Module *Imported = TheModuleLoader.loadModule(ModuleImportLoc,
00750                                                   ModuleImportPath,
00751                                                   Module::MacrosVisible,
00752                                                   /*IsIncludeDirective=*/false);
00753     if (Callbacks)
00754       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
00755   }
00756 }
00757 
00758 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
00759                                           const char *DiagnosticTag,
00760                                           bool AllowMacroExpansion) {
00761   // We need at least one string literal.
00762   if (Result.isNot(tok::string_literal)) {
00763     Diag(Result, diag::err_expected_string_literal)
00764       << /*Source='in...'*/0 << DiagnosticTag;
00765     return false;
00766   }
00767 
00768   // Lex string literal tokens, optionally with macro expansion.
00769   SmallVector<Token, 4> StrToks;
00770   do {
00771     StrToks.push_back(Result);
00772 
00773     if (Result.hasUDSuffix())
00774       Diag(Result, diag::err_invalid_string_udl);
00775 
00776     if (AllowMacroExpansion)
00777       Lex(Result);
00778     else
00779       LexUnexpandedToken(Result);
00780   } while (Result.is(tok::string_literal));
00781 
00782   // Concatenate and parse the strings.
00783   StringLiteralParser Literal(StrToks, *this);
00784   assert(Literal.isAscii() && "Didn't allow wide strings in");
00785 
00786   if (Literal.hadError)
00787     return false;
00788 
00789   if (Literal.Pascal) {
00790     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
00791       << /*Source='in...'*/0 << DiagnosticTag;
00792     return false;
00793   }
00794 
00795   String = Literal.GetString();
00796   return true;
00797 }
00798 
00799 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
00800   assert(Tok.is(tok::numeric_constant));
00801   SmallString<8> IntegerBuffer;
00802   bool NumberInvalid = false;
00803   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
00804   if (NumberInvalid)
00805     return false;
00806   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
00807   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
00808     return false;
00809   llvm::APInt APVal(64, 0);
00810   if (Literal.GetIntegerValue(APVal))
00811     return false;
00812   Lex(Tok);
00813   Value = APVal.getLimitedValue();
00814   return true;
00815 }
00816 
00817 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
00818   assert(Handler && "NULL comment handler");
00819   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
00820          CommentHandlers.end() && "Comment handler already registered");
00821   CommentHandlers.push_back(Handler);
00822 }
00823 
00824 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
00825   std::vector<CommentHandler *>::iterator Pos
00826   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
00827   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
00828   CommentHandlers.erase(Pos);
00829 }
00830 
00831 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
00832   bool AnyPendingTokens = false;
00833   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
00834        HEnd = CommentHandlers.end();
00835        H != HEnd; ++H) {
00836     if ((*H)->HandleComment(*this, Comment))
00837       AnyPendingTokens = true;
00838   }
00839   if (!AnyPendingTokens || getCommentRetentionState())
00840     return false;
00841   Lex(result);
00842   return true;
00843 }
00844 
00845 ModuleLoader::~ModuleLoader() { }
00846 
00847 CommentHandler::~CommentHandler() { }
00848 
00849 CodeCompletionHandler::~CodeCompletionHandler() { }
00850 
00851 void Preprocessor::createPreprocessingRecord() {
00852   if (Record)
00853     return;
00854   
00855   Record = new PreprocessingRecord(getSourceManager());
00856   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
00857 }